In [11]:
import numpy as np
import time
import csv
import random

You can download the "LSA semantic vectors.txt" from our OSF page at https://osf.io/6mys9/ This file contains the pre-trained LSA vectors trained on TASA.

In [12]:
#load LSA semantic vectors
with open('LSA semantic vectors.txt', 'r') as f:
    df = f.read()

In [13]:
#convert raw txt file into word list and matrix
df = df.split('\n')
df = [i.strip() for i in df]
word_list = []
word_matrix = []
for i in df:
    array = i.split(' ')
    word_list.append(array[0])
    word_vec = []
    for x in array[1:]:
        word_vec.append(float(x))
    word_matrix.append(word_vec)
word_matrix = np.array(word_matrix, dtype=np.float32)

In [14]:
# set up word dictionary to index rows of matrix by word
word_dic = {}
for i in range(0, len(word_list)):
    word_dic[word_list[i]] = i

In [15]:
import scipy.stats

def r_sq(x, y):
    cor = scipy.stats.pearsonr(x, y)
    return cor[0]**2

def mse(x, y):
    return np.sum((x - y)**2) / len(x)

In [16]:
def normalize_vec(vec):
    sq_vec = vec**2
    sum_sq = np.sum(sq_vec)
    mag = np.sqrt(sum_sq)
    normed_vec = vec / mag
    return normed_vec

In [17]:
def normalize_matrix(mat):
    sq_mat = mat**2
    sum_sq = np.sum(sq_mat, axis=1)
    mag = np.sqrt(sum_sq)
    mag[mag == 0] = 1
    normed_mat = np.transpose((np.transpose(mat) / mag))
    return normed_mat

In [18]:
def make_memory(word_list):
    mem_matrix = []
    for i in word_list:
        mem_matrix.append(word_matrix[word_dic[i]])
    mem_matrix = np.array(mem_matrix)
    return mem_matrix

In [19]:
def echo_intensity(probes, memory, tau=3):
    normed_memory = normalize_matrix(memory)
    similarities = probes @ np.transpose(normed_memory)
    if tau == 2:
        activations = similarities*(abs(similarities))
    if tau == 4:
        activations = similarities*(abs(similarities))*similarities*(abs(similarities))
    else:
        activations = similarities**tau
    activations = np.sum(activations, axis=1)
    return activations

In [20]:
df = []
with open('Stimuli_Semantic_False_Memory.csv', 'r') as f:
    csvreader = csv.reader(f, delimiter=',')
    for i in csvreader:
        df.append(i)

In [21]:
df

[['List',
  'Target Word',
  'Stimulus 01',
  'Stimulus 02',
  'Stimulus 03',
  'Stimulus 04',
  'Stimulus 05',
  'Stimulus 06'],
 ['1', 'BASS', 'guitar', 'treble', 'drum', 'fish', 'music ', 'boom'],
 ['2', 'BRAKE', 'stop', 'pedal', 'car', 'clutch', 'accelerate ', 'speed'],
 ['3', 'CLAM', 'oyster', 'seafood', 'shell', 'chowder', 'pearl', 'mussel '],
 ['4', 'CLAMP', 'hold', 'tight', 'vise', 'chisel', 'tool', 'metal'],
 ['5', 'CLUB', 'golf', 'member', 'ball', 'dance ', 'organisation', 'house'],
 ['6', 'DOVE', 'bird', 'peace', 'white', 'beak', 'bar', 'feather'],
 ['7', 'FEAR', 'scared', 'fright', 'terror', 'anxiety', 'monster', 'snake'],
 ['8', 'PLANE', 'jet', 'air', 'fly', 'sky', 'travel', 'geometry'],
 ['9', 'PINK', 'panther', 'pretty', 'purple', 'lemonade ', 'rose', 'dress'],
 ['10', 'GIN', 'tonic', 'alcohol', 'vodka', 'drink', 'liquor', 'drunk'],
 ['11', 'FRAUD', 'fake', 'cheat', 'lie', 'crime', 'false ', 'money '],
 ['12',
  'FILE',
  'cabinet',
  'paper',
  'folder',
  'drawer',
  '

In [22]:
#substitute American spellings
df[5][6] = 'organization'
df[20][3] = 'meter'

In [23]:
#set up list A
list_old = []
crit_lure = []
list_new = []
crit_new = []
for i in df[1:11]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_lure.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_old.append(cl)
for i in df[11:]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_new.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_new.append(cl)

In [24]:
all_items_a = list_old + crit_lure + list_new + crit_new

In [25]:
#set up list B
list_old = []
crit_lure = []
list_new = []
crit_new = []
for i in df[11:]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_lure.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_old.append(cl)
for i in df[1:11]:
    cl = i[1].strip()
    cl = cl.lower()
    crit_new.append(cl)
    for x in i[2:]:
        cl = x.strip()
        cl = cl.lower()
        list_new.append(cl)

In [26]:
all_items_b = list_old + crit_lure + list_new + crit_new

In [27]:
# normalize LSA matrix before simulation
word_matrix = normalize_matrix(word_matrix)

Simulations

In [29]:
#empirical means
emp_means = np.array([0.684375, 0.44166667, 0.20069444, 0.23333333])

In [30]:
l = 0.22 #learning rate
t = 3      #retrieval exponent
p_old = 43   # percentage of items to be deemed old
sim_list = []
for s in range(0, 1000):
    if s < 500:
        stims = all_items_a.copy()
    else:
        stims = all_items_b.copy()
    memory = make_memory(stims[0:60])
    memory *= np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
    probes = make_memory(stims)
    familiarities = echo_intensity(probes, memory, tau=t)
    criterion = np.percentile(familiarities, 100-p_old)
    list_rel_hits = np.sum(familiarities[:60] > criterion) / 60
    crit_rel_hits = np.sum(familiarities[60:70] > criterion) / 10
    list_new_hits = np.sum(familiarities[70:130] > criterion) / 60
    crit_new_hits = np.sum(familiarities[130:140] > criterion) / 10
    sim_list.append([list_rel_hits, crit_rel_hits, list_new_hits, crit_new_hits])
print('means: ', np.mean(sim_list, axis=0))
print('SDs: ', np.std(sim_list, axis=0))

means:  [0.69208333 0.4457     0.19416667 0.2368    ]
SDs:  [0.06508408 0.12418337 0.06108168 0.08582401]


In [31]:
r_sq(emp_means, np.mean(sim_list, axis=0))

0.9997159710793717

In [84]:
#loop to find best parameters
t = 3
p_old = 43
para_list = []
for L in range(1, 31):
    l = L / 100
    sim_list = []
    for s in range(0, 1000):
        if s < 500:
            stims = all_items_a.copy()
        else:
            stims = all_items_b.copy()
        memory = make_memory(stims[0:60])
        memory *= np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
        probes = make_memory(stims)
        familiarities = echo_intensity(probes, memory, tau=t)
        criterion = np.percentile(familiarities, 100-p_old)
        list_rel_hits = np.sum(familiarities[:60] > criterion) / 60
        crit_rel_hits = np.sum(familiarities[60:70] > criterion) / 10
        list_new_hits = np.sum(familiarities[70:130] > criterion) / 60
        crit_new_hits = np.sum(familiarities[130:140] > criterion) / 10
        sim_list.append([list_rel_hits, crit_rel_hits, list_new_hits, crit_new_hits])
    sim_means = np.mean(sim_list, axis=0)
    r_sq_sim = r_sq(emp_means, sim_means)
    mse_sim = mse(emp_means, sim_means)
    para_list.append([l, r_sq_sim, mse_sim, sim_means])

In [85]:
def sel_third(x):
    return x[2]

In [86]:
para_list.sort(key=sel_third)

In [None]:
para_list