In [1]:
import random
import numpy as np
import csv

In [2]:
def sel_second(x):
    return x[1]

In [3]:
def normalize_vec(x):
    if np.sum(x**2)>0:
        normed_x = x / np.sqrt(np.sum(x**2))
    else:
        normed_x = x.copy()
    return normed_x

In [4]:
def normalize_matrix(mat):
    sq_mat = mat**2
    sum_sq = np.sum(sq_mat, axis=1)
    mag = np.sqrt(sum_sq)
    mag[mag == 0] = 1
    normed_mat = np.transpose((np.transpose(mat) / mag))
    return normed_mat

In [5]:
def find_neighbors(word, word_dic, word_matrix, n=20):
    phon_sim = word_matrix[word_dic[word]] @ np.transpose(word_matrix)
    word_list = [i for i in word_dic]
    output = list(zip(word_list, phon_sim))
    output.sort(key = sel_second, reverse=True)
    return output[1:(n+1)]

The txt files for the three types of vectors can be found at our OSF page at https://osf.io/6mys9/ 

In [6]:
#load LSA semantic vectors
with open('LSA semantic vectors.txt', 'r') as f:
    df = f.read()

In [7]:
#convert raw txt file into word list and matrix
df = df.split('\n')
df = [i.strip() for i in df]
sem_list = []
sem_matrix = []
for i in df:
    array = i.split(' ')
    sem_list.append(array[0])
    word_vec = []
    for x in array[1:]:
        word_vec.append(float(x))
    sem_matrix.append(word_vec)
sem_matrix = np.array(sem_matrix, dtype=np.float32)

In [8]:
# set up word dictionary to index rows of matrix by word
sem_dic = {}
for i in range(0, len(sem_list)):
    sem_dic[sem_list[i]] = i

In [9]:
#load phonology vectors
with open('Phonology vectors.txt', 'r') as f:
    df = f.read()

In [10]:
#convert raw txt file into word list and matrix
df = df.split('\n')
df = [i.strip() for i in df]
phon_list = []
phon_matrix = []
for i in df:
    array = i.split(' ')
    phon_list.append(array[0])
    word_vec = []
    for x in array[1:]:
        word_vec.append(float(x))
    phon_matrix.append(word_vec)
phon_matrix = np.array(phon_matrix, dtype=np.float32)

In [11]:
# set up word dictionary to index rows of matrix by word
phon_dic = {}
for i in range(0, len(phon_list)):
    phon_dic[phon_list[i]] = i

In [12]:
# Load orthography vectors
with open('Orthography vectors.txt', 'r') as f:
    df = f.read()

In [13]:
#convert raw txt file into word list and matrix
df = df.split('\n')
df = [i.strip() for i in df]
ortho_list = []
ortho_matrix = []
for i in df:
    array = i.split(' ')
    ortho_list.append(array[0])
    word_vec = []
    for x in array[1:]:
        word_vec.append(float(x))
    ortho_matrix.append(word_vec)
ortho_matrix = np.array(ortho_matrix, dtype=np.float32)

In [14]:
# set up word dictionary to index rows of matrix by word
ortho_dic = {}
for i in range(0, len(ortho_list)):
    ortho_dic[ortho_list[i]] = i

In [15]:
# normalize matrices
sem_matrix = normalize_matrix(sem_matrix)
phon_matrix = normalize_matrix(phon_matrix)
ortho_matrix = normalize_matrix(ortho_matrix)

In [16]:
#find words common to three matrices
combined_words = [i for i in sem_dic if i in phon_dic and i in ortho_dic and i in sem_dic]

In [17]:
word_dic = {}
for i in range(0, len(combined_words)):
    word_dic[combined_words[i]] = i

In [18]:
word_matrix = []
for i in combined_words:
    sem_vec = sem_matrix[sem_dic[i]].copy()
    phon_vec = phon_matrix[phon_dic[i]].copy()
    ortho_vec = ortho_matrix[ortho_dic[i]].copy()
    word_matrix.append(np.concatenate((sem_vec, phon_vec, ortho_vec)))

In [19]:
# substitute semantic vector of 'teething' in for full 'teethe' vector
teethe_sem = sem_matrix[sem_dic['teething']]
teethe_phon = phon_matrix[phon_dic['teethe']]
teethe_ortho = ortho_matrix[ortho_dic['teethe']]
teethe_vec = np.concatenate((teethe_sem, teethe_phon, teethe_ortho))
word_matrix.append(teethe_vec)

In [20]:
# add 'teethe' to word dictionary
word_dic['teethe'] = len(word_dic)

In [21]:
#normalize full matrix
word_matrix = np.array(word_matrix)
word_matrix = normalize_matrix(word_matrix)

In [22]:
find_neighbors('snow', word_dic, word_matrix, n=10)

[('snows', 0.69701505),
 ('snowy', 0.6373938),
 ('snowman', 0.6143952),
 ('snowed', 0.6124683),
 ('snowstorm', 0.5907043),
 ('snowfall', 0.5447408),
 ('snowflakes', 0.5377385),
 ('snowshoe', 0.52864766),
 ('snowball', 0.5247484),
 ('snowstorms', 0.50575155)]

In [23]:
def find_neighbors_by_space(word, word_dic, word_matrix, space='sem', n=30):
    if space == 'sem':
        target_word = word_matrix[word_dic[word]][:300]
        word_space = word_matrix[:, 0:300]
    elif space == 'phon':
        target_word = word_matrix[word_dic[word]][300:600]
        word_space = word_matrix[:, 300:600]
    elif space == 'ortho':
        target_word = word_matrix[word_dic[word]][600:900]
        word_space = word_matrix[:, 600:900]
    elif space == 'all':
        target_word = normalize(word_matrix[word_dic[word]].copy())
        word_space = word_matrix
    word_space = normalize_matrix(word_space)
    word_list = [i for i in word_dic]
    cos_sim = normalize_vec(target_word) @ np.transpose(word_space)
    cos_sim[word_dic[word]] = 0
    cos_list = list(zip(word_list, cos_sim))
    cos_list.sort(key=sel_second, reverse=True)
    return cos_list[:n]

In [24]:
find_neighbors_by_space('caught', word_dic, word_matrix, space='sem', n=10)

[('catch', 0.7452618),
 ('catching', 0.6926295),
 ('threw', 0.67456704),
 ('leaped', 0.6655893),
 ('wildly', 0.6563325),
 ('tossed', 0.655919),
 ('suddenly', 0.6557272),
 ('grabbed', 0.6513512),
 ('quick', 0.64505243),
 ('out', 0.6365896)]

In [25]:
find_neighbors_by_space('caught', word_dic, word_matrix, space='phon', n=10)

[('cot', 0.9999999),
 ('cots', 0.84441763),
 ('cotton', 0.7897006),
 ('scott', 0.78440845),
 ('scot', 0.78440845),
 ('cotter', 0.7636759),
 ('tock', 0.7596828),
 ('tot', 0.74973047),
 ('cotta', 0.7444337),
 ('got', 0.7368597)]

In [26]:
find_neighbors_by_space('caught', word_dic, word_matrix, space='ortho', n=10)

[('aught', 0.86199254),
 ('taught', 0.7695395),
 ('mcnaught', 0.765159),
 ('naught', 0.7614912),
 ('haughty', 0.68023616),
 ('fraught', 0.6767743),
 ('draught', 0.67655665),
 ('naughty', 0.6681263),
 ('mcgaugh', 0.6593457),
 ('waugh', 0.6241391)]

MINERVA functions

In [27]:
def make_memory(word_list):
    mem_matrix = []
    for i in word_list:
        mem_matrix.append(word_matrix[word_dic[i]])
    mem_matrix = np.array(mem_matrix)
    return mem_matrix

In [28]:
def echo_intensity(probes, memory, tau=3):
    normed_memory = normalize_matrix(memory)
    similarities = probes @ np.transpose(normed_memory)
    if tau == 2:
        activations = similarities*(abs(similarities))
    if tau == 4:
        activations = similarities*(abs(similarities))*similarities*(abs(similarities))
    else:
        activations = similarities**tau
    activations = np.sum(activations, axis=1)
    return activations

In [29]:
import scipy.stats

def r_sq(x, y):
    cor = scipy.stats.pearsonr(x, y)
    return cor[0]**2

def mse(x, y):
    return np.sum((x - y)**2) / len(x)

Simulation Exp 1 -- Semantics

In [30]:
df = []
with open('Stimuli_Semantic_False_Memory.csv', 'r') as f:
    csvreader = csv.reader(f, delimiter=',')
    for i in csvreader:
        df.append(i)

In [31]:
# substitute American spellings
df[5][6] = 'organization'
df[20][3] = 'meter'

In [32]:
for i in range(0, len(df)):
    df[i] = [x.lower() for x in df[i]]

In [33]:
df_a = df[1:11]
df_b = df[11:]

In [34]:
#Set up list A for Simulation
list_old = []
crit_lure = []
list_new = []
crit_new = []
for i in df[1:11]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_lure.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_old.append(cl)
for i in df[11:]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_new.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_new.append(cl)
all_items_a = list_old + crit_lure + list_new + crit_new

In [35]:
#Set up list B for Simulation
list_old = []
crit_lure = []
list_new = []
crit_new = []
for i in df[11:]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_lure.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_old.append(cl)
for i in df[1:11]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_new.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_new.append(cl)
all_items_b = list_old + crit_lure + list_new + crit_new

In [36]:
# Empirical means
emp_means = np.array([0.684375, 0.44166667, 0.20069444, 0.23333333])

In [37]:
l = .75   # Learning rate
t = 1      # Retrieval exponent
p_old = 43   # Percentage of items to be deemed old
sim_list = []
for s in range(0, 1000):
    if s < 500:
        stims = all_items_a.copy()
    else:
        stims = all_items_b.copy()
    memory = make_memory(stims[0:60])
    memory *= np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
    probes = make_memory(stims)
    familiarities = echo_intensity(probes, memory, tau=t)
    criterion = np.percentile(familiarities, 100-p_old)
    list_rel_hits = np.sum(familiarities[:60] > criterion) / 60
    crit_rel_hits = np.sum(familiarities[60:70] > criterion) / 10
    list_new_hits = np.sum(familiarities[70:130] > criterion) / 60
    crit_new_hits = np.sum(familiarities[130:140] > criterion) / 10
    sim_list.append([list_rel_hits, crit_rel_hits, list_new_hits, crit_new_hits])
means = np.mean(sim_list, axis=0)
sds = np.std(sim_list, axis=0, ddof=1)
print('itemtype:', ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure'])
print('means: ', means)
print('SDs: ', sds)
print('R-squared fit: ', r_sq(emp_means, means))
print('Empirical means: ', emp_means)

itemtype: ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure']
means:  [0.6605     0.4088     0.22873333 0.2558    ]
SDs:  [0.0341862  0.10456335 0.05013003 0.0530016 ]
R-squared fit:  0.9925707963827805
Empirical means:  [0.684375   0.44166667 0.20069444 0.23333333]


Simulation Exp 2 -- Phonology

In [38]:
#load stimuli
df = []
with open('Stimuli_Phonological_False_Memory.csv') as f:
    csvreader = csv.reader(f)
    for line in csvreader:
        df.append(line)

In [39]:
df = df[1:]

In [40]:
df_a = df[:10]

In [41]:
df_b = df[10:]

In [42]:
#set up list for simulation
df_test_list_a = []
for i in df_a:
    for x in i[1:]:
        df_test_list_a.append(x)
for i in df_a:
    df_test_list_a.append(i[0])
for i in df_b:
    for x in i[1:]:
        df_test_list_a.append(x)
for i in df_b:
    df_test_list_a.append(i[0])

In [43]:
#set up list for simulation
df_test_list_b = []
for i in df_b:
    for x in i[1:]:
        df_test_list_b.append(x)
for i in df_b:
    df_test_list_b.append(i[0])
for i in df_a:
    for x in i[1:]:
        df_test_list_b.append(x)
for i in df_a:
    df_test_list_b.append(i[0])

In [44]:
emp_data = [0.682     , 0.578     , 0.35033333, 0.4]       

In [46]:
l = 0.05    # Learning rate
t = 1       # Retrieval exponent
p_old = 51   # Percentage of old items
sim_list = []
for s in range(0, 1000):
    if s < 500:
        stims = df_test_list_a.copy()
    else:
        stims = df_test_list_b.copy()
    memory = make_memory(stims[0:60])
    memory = memory * np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
    probes = make_memory(stims)
    familiarities = echo_intensity(probes, memory, tau=t)
    criterion = np.percentile(familiarities, 100-p_old)
    old_items = familiarities[0:60]
    crit_lure = familiarities[60:70]
    unrel_items = familiarities[70:130]
    uncrit_items = familiarities[130:140]
    old_hits = np.sum(old_items > criterion) / 60
    crit_hits = np.sum(crit_lure > criterion) / 10
    unrel_hits = np.sum(unrel_items > criterion) / 60
    uncrit_hits = np.sum(uncrit_items > criterion) / 10
    sim_list.append([old_hits, crit_hits, unrel_hits, uncrit_hits])
means = np.mean(sim_list, axis=0)
sds = np.std(sim_list, axis=0, ddof=1)
print('itemtype:', ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure'])
print('means: ', means)
print('SDs: ', sds)
print('R-squared fit: ', r_sq(emp_data, means))
print('Empirical means: ', emp_data)

itemtype: ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure']
means:  [0.67465    0.6283     0.34246667 0.369     ]
SDs:  [0.04748589 0.15333826 0.04833939 0.12827882]
R-squared fit:  0.9668482932684399
Empirical means:  [0.682, 0.578, 0.35033333, 0.4]
