In [77]:
import string
import numpy as np
import time
import csv
import stanza
import random
import pickle

In [78]:
from nltk.corpus import stopwords
stoplist = set(stopwords.words('english'))

In [79]:
import scipy.stats

In [80]:
def strip_punctuation(x):
    x = [i for i in x if i not in string.punctuation]
    x = ''.join(x)
    return x

In [81]:
def r_sq(x, y):
    cor = scipy.stats.pearsonr(x, y)
    return cor[0]**2

In [24]:
def normalize_vec(vec):
    sq_vec = vec**2
    sum_sq = np.sum(sq_vec)
    mag = np.sqrt(sum_sq)
    normed_vec = vec / mag
    return normed_vec

In [25]:
def normalize_matrix(mat):
    sq_mat = mat**2
    sum_sq = np.sum(sq_mat, axis=1)
    mag = np.sqrt(sum_sq)
    mag[mag == 0] = 1
    normed_mat = np.transpose((np.transpose(mat) / mag))
    return normed_mat

In [26]:
def make_memory(word_list):
    mem_matrix = []
    for i in word_list:
        mem_matrix.append(word_matrix[word_dic[i]])
    mem_matrix = np.array(mem_matrix)
    return mem_matrix

In [27]:
def echo_intensity(probes, memory, tau=3):
    normed_memory = normalize_matrix(memory)
    similarities = probes @ np.transpose(normed_memory)
    if tau == 2:
        activations = similarities*(abs(similarities))
    if tau == 4:
        activations = similarities*(abs(similarities))*similarities*(abs(similarities))
    else:
        activations = similarities**tau
    activations = np.sum(activations, axis=1)
    return activations

In [100]:
with open('R&M1995.csv') as f:
    df = list(csv.reader(f))
df = df[1:]
all_drm_words = []
for i in df:
    all_drm_words.append(i[0])
    word = i[1].split(', ')
    for j in word:
        all_drm_words.append(j)

In [101]:
def make_random_stims(df):
    crit_lures = []
    crit_unrelated = []
    old = []
    unrelated = []
    num = [i for i in range(0,24)]
    random.shuffle(num)
    df_presented = []
    df_not_presented = []
    for i in num[0:5]:
        df_presented.append(df[i])
    for i in num[5:10]:
        df_not_presented.append(df[i])
    for i in df_presented:
        crit_lures.append(i[0])
        items = i[1].split(', ')
        for j in items:
            old.append(j)
    for i in df_not_presented:
        crit_unrelated.append(i[0])
        items = i[1].split(', ')
        for j in items:
            unrelated.append(j)
    drm_list = old + crit_lures + unrelated + crit_unrelated
    return drm_list

In [30]:
#load LSA. These are the pre-trained vectors from Fritz Gunther's website. 
#downloadable from https://sites.google.com/site/fritzgntr/software-resources/semantic_spaces
#The TASA matrix was converted for use in Python
#word_dic includes all words in the corpus and is used to index the rows of the word matrix
#word_matrix is a 92393 (words) by 300 (dimensions) numpy array
import pickle
with open('LSA word matrix april 19', 'rb') as f:
    word_matrix = pickle.load(f)
with open('LSA word dic april 19', 'rb') as f:
    word_dic = pickle.load(f)

In [56]:
def get_sent_vec(sent):
    parsed = sent.split(' ')
    parsed = [i for i in parsed if i not in stoplist]
    sent_vec = np.zeros(len(word_matrix[0]))
    for i in parsed:
        sent_vec += word_matrix[word_dic[i]]
    sent_vec = normalize_vec(sent_vec)
    return sent_vec

Simulation 1: Arndt and Hirshman

In [102]:
#empirical means
r_300 = np.array([.51, .61, .21, .28])
r_500 = np.array([.60, .75, .17, .23])
r_800 = np.array([.65, .72, .14, .18])
r_3000 = np.array([.77, .76, .10, .16])

In [103]:
# Simulation for 3000ms study rate (change l to simulate other study rates)
l = 0.05
t = 3
p_old = 47
sim_list = []
for s in range(0, 1000):
    stims = make_random_stims(df)
    memory = make_memory(stims[0:75])
    memory = memory * np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
    probes = make_memory(stims)
    probes = normalize_matrix(probes)
    familiarities = echo_intensity(probes, memory, tau=t)
    criterion = np.percentile(familiarities, 100-p_old)
    old_items = familiarities[0:75]
    crit_lure = familiarities[75:80]
    unrel_items = familiarities[80:155]
    uncrit_items = familiarities[155:160]
    old_hits = np.sum(old_items > criterion) / 75
    crit_hits = np.sum(crit_lure > criterion) / 5
    unrel_hits = np.sum(unrel_items > criterion) / 75
    uncrit_hits = np.sum(uncrit_items > criterion) / 5
    sim_list.append([old_hits, crit_hits, unrel_hits, uncrit_hits])
means = np.mean(sim_list, axis=0)
sds = np.std(sim_list, axis=0)
print('itemtype:', ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure'])
print('means: ', means)
print('SDs: ', sds)

itemtype: ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure']
means:  [0.789  0.7028 0.1542 0.1492]
SDs:  [0.04098428 0.19243742 0.04100033 0.15355572]


In [104]:
#R-squared for 3000ms study rate
r_sq(r_3000, np.mean(sim_list, axis=0))

0.9865007707615485

Simulation 2 : Bransford and Franks

In [46]:
df = []
with open('BF1971Exp2.csv') as f:
    reader = csv.reader(f, delimiter = ',')
    for row in reader:
        df.append(row)

In [47]:
def mem_matrix(sent_list):
    mem = []
    for i in sent_list:
        mem_vec = get_sent_vec(i)
        mem.append(mem_vec)
    mem = np.array(mem)
    return mem

In [48]:
acquisition = []
for i in df[1:]:
    if i[2]=='1':
        acquisition.append(i[4])

In [49]:
fours = []
threes = []
twos = []
ones = []
noncase = []
for i in df[1:]:
    if i[2]=='0' and i[3]=='1':
        if i[1]=='four':
            fours.append(i[4])
        if i[1]=='three':
            threes.append(i[4])
        if i[1]=='two':
            twos.append(i[4])
        if i[1]=='one':
            ones.append(i[4])
        if i[1]=='noncase':
            noncase.append(i[4])

In [57]:
acq = mem_matrix(acquisition)
four = mem_matrix(fours)
three = mem_matrix(threes)
two = mem_matrix(twos)
one = mem_matrix(ones)
ncase = mem_matrix(noncase)

In [58]:
#empirical means
bf_emp = [3.395,  2.000,  1.405, -1.060, -4.240]

In [69]:
#Tau = 3 simulation
l = 0.6
t = 3
sim_list = []
for s in range(0, 1000):
    acq_mem = acq * np.random.choice([0, 1], size=(len(acq), len(acq[0])), p=[1-l, l])
    four_echo = echo_intensity(four, acq_mem, tau=t)
    three_echo = echo_intensity(three, acq_mem, tau=t)
    two_echo = echo_intensity(two, acq_mem, tau=t)
    one_echo = echo_intensity(one, acq_mem, tau=t)
    noncase_echo = echo_intensity(ncase, acq_mem, tau=t)
    sim_list.append([np.mean(four_echo), np.mean(three_echo), np.mean(two_echo), np.mean(one_echo), np.mean(noncase_echo)])
print('item type: ', np.array(['4', '3', '2', '1', 'NC']))
print('means: ', np.mean(sim_list, axis=0))

item type:  ['4' '3' '2' '1' 'NC']
means:  [1.64219126 1.49089284 1.4090732  1.09290584 1.0119236 ]


In [70]:
#R-squared for Tau = 3 model
r_sq(bf_emp, np.mean(sim_list, axis=0))

0.9224744512097227

In [72]:
#Tau = 7 simulation
l = 0.6
t = 7
sim_list = []
for s in range(0, 1000):
    acq_mem = acq * np.random.choice([0, 1], size=(len(acq), len(acq[0])), p=[1-l, l])
    four_echo = echo_intensity(four, acq_mem, tau=t)
    three_echo = echo_intensity(three, acq_mem, tau=t)
    two_echo = echo_intensity(two, acq_mem, tau=t)
    one_echo = echo_intensity(one, acq_mem, tau=t)
    noncase_echo = echo_intensity(ncase, acq_mem, tau=t)
    sim_list.append([np.mean(four_echo), np.mean(three_echo), np.mean(two_echo), np.mean(one_echo), np.mean(noncase_echo)])
print('item type: ', np.array(['4', '3', '2', '1', 'NC']))
print('means: ', np.mean(sim_list, axis=0))

item type:  ['4' '3' '2' '1' 'NC']
means:  [0.41542668 0.35206077 0.33515607 0.23358504 0.12070609]


In [73]:
#R-squared for Tau = 7 model
r_sq(bf_emp, np.mean(sim_list, axis=0))

0.9982337193975938

Simulation 3: Reid and Katz

In [82]:
df = []
with open('drmcmt.csv') as f:
    reader = csv.reader(f, delimiter = ',')
    for row in reader:
        df.append(row)

In [83]:
luredf = []
with open('cmtlures.csv') as f:
    reader = csv.reader(f, delimiter = ',')
    for row in reader:
        luredf.append(row)

In [84]:
crit_new = []
met_lure = []
lit_lure = []
for x in luredf:
    if x[0] != '':
        lure = x[0].lower()
        lure = strip_punctuation(lure)
        crit_new.append(lure)
    if x[1] != '':
        lure = x[1].lower()
        lure = strip_punctuation(lure)
        met_lure.append(lure)
    if x[2] != '':
        lure = x[2].lower()
        lure = strip_punctuation(lure)
        lit_lure.append(lure)

In [85]:
study_lists = []
for i in df[1:6]:
    text = i[0].lower()
    text = text.replace('?','.')
    sentences = text.split('. ')
    sentences[14] = sentences[14][0:-1]
    sentences = [strip_punctuation(i) for i in sentences]
    study_lists.append(sentences)

In [86]:
old_items = []
for i in study_lists:
    old_items.append(i[4].lower())
    old_items.append(i[6].lower())
    old_items.append(i[12].lower())

In [87]:
studied_items = []
for i in study_lists:
    studied_items.extend(i)

In [88]:
# "condo" not in TASA. We replaced the word condo with "condominium" to maintain the same meaning
lit_lure[5] = 'her condominium has three rooms'

In [89]:
len(old_items)

15

In [90]:
len(studied_items)

75

In [91]:
mem = mem_matrix(studied_items)
old_probes = mem_matrix(old_items)
crit_probes = mem_matrix(crit_new)
met_probes = mem_matrix(met_lure)
lit_probes = mem_matrix(lit_lure)

In [92]:
#Empirical means
emp_means = np.array([.5593, .2298, .0766, .0362])

In [96]:
l = .05
t = 3
p_old = 30
sim_list = []
for s in range(0, 1000):
    updated_mem = mem * np.random.choice([0, 1], size=(len(mem), len(mem[0])), p=[1-l, l])
    old_echos = echo_intensity(old_probes, updated_mem, tau=t)
    crit_echos = echo_intensity(crit_probes, updated_mem, tau=t)
    met_echos = echo_intensity(met_probes, updated_mem, tau=t)
    lit_echos = echo_intensity(lit_probes, updated_mem, tau=t)
    total_echos = []
    total_echos.extend(old_echos)
    total_echos.extend(crit_echos)
    total_echos.extend(met_echos)
    total_echos.extend(lit_echos)
    total_echos = np.array(total_echos)
    criterion = np.percentile(total_echos, 100-p_old)
    old_hits = np.sum(old_echos > criterion) / 15
    crit_hits = np.sum(crit_echos > criterion) / 5
    met_hits = np.sum(met_echos > criterion) / 5
    lit_hits = np.sum(lit_echos > criterion) / 10
    sim_list.append([old_hits, crit_hits, met_hits, lit_hits])
sim_list = np.array(sim_list)
sim_means = np.mean(sim_list, axis=0)
print(sim_means)

[0.6074 0.2168 0.064  0.0485]


In [97]:
#R-squared for model
r_sq(emp_means, np.mean(sim_list, axis=0))

0.9947636109894209