In [1]:
import numpy as np
import csv
import random

For code to construct Phonology Vectors, see Parrish (2017):

Parrish, A. (2017, October). Poetic sound similarity vectors using Phonetic Features [Paper presentation]. 
AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment, Snowbird, Utah, USA. https://www.aaai.org/ocs/index.php/AIIDE/AIIDE17/paper/view/15879/15227


You can download the "Phonology vectors.txt" from our OSF page at https://osf.io/6mys9/
This file contains the pre-trained phonology vectors

In [2]:
#load phonology vectors
with open('Phonology vectors.txt', 'r') as f:
    df = f.read()

In [3]:
#convert raw txt file into word list and matrix
df = df.split('\n')
df = [i.strip() for i in df]
word_list = []
word_matrix = []
for i in df:
    array = i.split(' ')
    word_list.append(array[0])
    word_vec = []
    for x in array[1:]:
        word_vec.append(float(x))
    word_matrix.append(word_vec)
word_matrix = np.array(word_matrix, dtype=np.float32)

In [4]:
# set up word dictionary to index rows of matrix by word
word_dic = {}
for i in range(0, len(word_list)):
    word_dic[word_list[i]] = i

In [5]:
def normalize_vec(vec):
    sq_vec = vec**2
    sum_sq = np.sum(sq_vec)
    mag = np.sqrt(sum_sq)
    normed_vec = vec / mag
    return normed_vec

In [6]:
def normalize_matrix(mat):
    sq_mat = mat**2
    sum_sq = np.sum(sq_mat, axis=1)
    mag = np.sqrt(sum_sq)
    mag[mag == 0] = 1
    normed_mat = np.transpose((np.transpose(mat) / mag))
    return normed_mat

In [7]:
def make_memory(word_list):
    mem_matrix = []
    for i in word_list:
        mem_matrix.append(word_matrix[word_dic[i]])
    mem_matrix = np.array(mem_matrix)
    return mem_matrix

In [8]:
def echo_intensity(probes, memory, tau=3):
    normed_memory = normalize_matrix(memory)
    similarities = probes @ np.transpose(normed_memory)
    if tau == 2:
        activations = similarities*(abs(similarities))
    if tau == 4:
        activations = similarities*(abs(similarities))*similarities*(abs(similarities))
    else:
        activations = similarities**tau
    activations = np.sum(activations, axis=1)
    return activations

In [9]:
import scipy.stats
def r_sq(x, y):
    cor = scipy.stats.pearsonr(x, y)
    return cor[0]**2

def mse(x, y):
    return np.sum((x - y)**2) / len(x)

In [10]:
df = []
with open('Stimuli_Phonological_False_Memory.csv') as f:
    csvreader = csv.reader(f)
    for line in csvreader:
        df.append(line)

In [11]:
df = df[1:]

In [12]:
df_a = df[:10]

In [13]:
df_b = df[10:]

In [14]:
# prepare list a
df_test_list_a = []
for i in df_a:
    for x in i[1:]:
        df_test_list_a.append(x)
for i in df_a:
    df_test_list_a.append(i[0])
for i in df_b:
    for x in i[1:]:
        df_test_list_a.append(x)
for i in df_b:
    df_test_list_a.append(i[0])

In [15]:
# prepare list b
df_test_list_b = []
for i in df_b:
    for x in i[1:]:
        df_test_list_b.append(x)
for i in df_b:
    df_test_list_b.append(i[0])
for i in df_a:
    for x in i[1:]:
        df_test_list_b.append(x)
for i in df_a:
    df_test_list_b.append(i[0])

Simulations

In [16]:
emp_data = [0.682     , 0.578     , 0.35033333, 0.4]       

In [17]:
l = 0.03
t = 3
p_old = 51
sim_list = []
for s in range(0, 1000):
    if s < 500:
        stims = df_test_list_a.copy()
    else:
        stims = df_test_list_b.copy()
    memory = make_memory(stims[0:60])
    memory = memory * np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
    probes = make_memory(stims)
    familiarities = echo_intensity(probes, memory, tau=t)
    criterion = np.percentile(familiarities, 100-p_old)
    old_items = familiarities[0:60]
    crit_lure = familiarities[60:70]
    unrel_items = familiarities[70:130]
    uncrit_items = familiarities[130:140]
    old_hits = np.sum(old_items > criterion) / 60
    crit_hits = np.sum(crit_lure > criterion) / 10
    unrel_hits = np.sum(unrel_items > criterion) / 60
    uncrit_hits = np.sum(uncrit_items > criterion) / 10
    sim_list.append([old_hits, crit_hits, unrel_hits, uncrit_hits])
means = np.mean(sim_list, axis=0)
sds = np.std(sim_list, axis=0, ddof=1)
print('itemtype:', ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure'])
print('means: ', means)
print('SDs: ', sds)
print('R-sq fit:', r_sq(emp_data, means))
print('Empirical means: ', emp_data)

itemtype: ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure']
means:  [0.65485 0.5438  0.38105 0.3408 ]
SDs:  [0.04876497 0.14806581 0.05295261 0.13643304]
R-sq fit: 0.9400511174210896
Empirical means:  [0.682, 0.578, 0.35033333, 0.4]


In [18]:
# loop to find best parameters
para_list = []
t = 3
p_old = 51
for L in range(1, 11, 1):
    l = L / 100
    sim_list = []
    for s in range(0, 1000):
        if s < 500:
            stims = df_test_list_a.copy()
        else:
            stims = df_test_list_b.copy()
        memory = make_memory(stims[0:60])
        memory = memory * np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
        probes = make_memory(stims)
        familiarities = echo_intensity(probes, memory, tau=t)
        criterion = np.percentile(familiarities, 100-p_old)
        old_items = familiarities[0:60]
        crit_lure = familiarities[60:70]
        unrel_items = familiarities[70:130]
        uncrit_items = familiarities[130:140]
        old_hits = np.sum(old_items > criterion) / 60
        crit_hits = np.sum(crit_lure > criterion) / 10
        unrel_hits = np.sum(unrel_items > criterion) / 60
        uncrit_hits = np.sum(uncrit_items > criterion) / 10
        sim_list.append([old_hits, crit_hits, unrel_hits, uncrit_hits])
    means = np.mean(sim_list, axis=0)
    sds = np.std(sim_list, axis=0, ddof=1)
    r_fit = r_sq(means, emp_data)
    mse_fit = mse(means, emp_data)
    para_list.append([l, r_fit, mse_fit, means])

In [19]:
def sel_third(x):
    return x[2]

In [20]:
# sort by lowest mean square error
para_list.sort(key=sel_third)

In [21]:
para_list

[[0.03,
  0.9501042520399086,
  0.001382640181527799,
  array([0.65961667, 0.5428    , 0.37591667, 0.344     ])],
 [0.04,
  0.9575428215657519,
  0.0023796490101944032,
  array([0.69981667, 0.5549    , 0.33978333, 0.3075    ])],
 [0.02,
  0.9245480924417957,
  0.0032959369260277325,
  array([0.60781667, 0.5301    , 0.42261667, 0.3873    ])],
 [0.05,
  0.9665573662011808,
  0.005730520192333293,
  array([0.7435    , 0.573     , 0.29906667, 0.2716    ])],
 [0.01,
  0.8454810330339829,
  0.008704389223583199,
  array([0.55805   , 0.5109    , 0.46781667, 0.4339    ])],
 [0.06,
  0.9751981214647255,
  0.008866749103194512,
  array([0.77253333, 0.5857    , 0.27058333, 0.2556    ])],
 [0.07,
  0.9730218181730962,
  0.013016062053333593,
  array([0.79906667, 0.5792    , 0.249     , 0.2324    ])],
 [0.08,
  0.9760931513520864,
  0.01792487159652829,
  array([0.82428333, 0.5932    , 0.22491667, 0.2116    ])],
 [0.09,
  0.976279884312816,
  0.022176947887917325,
  array([0.8432    , 0.5975    , 0