In [1]:
import numpy as np
import csv
import random

For code to construct Phonology Vectors, see Parrish (2017):

Parrish, A. (2017, October). Poetic sound similarity vectors using Phonetic Features [Paper presentation]. 
AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment, Snowbird, Utah, USA. https://www.aaai.org/ocs/index.php/AIIDE/AIIDE17/paper/view/15879/15227


You can download the "Phonology vectors.txt" from our OSF page at https://osf.io/6mys9/
This file contains the pre-trained phonology vectors

In [6]:
#load phonology vectors
with open('Phonology vectors.txt', 'r') as f:
    df = f.read()

In [7]:
#convert raw txt file into word list and matrix
df = df.split('\n')
df = [i.strip() for i in df]
word_list = []
word_matrix = []
for i in df:
    array = i.split(' ')
    word_list.append(array[0])
    word_vec = []
    for x in array[1:]:
        word_vec.append(float(x))
    word_matrix.append(word_vec)
word_matrix = np.array(word_matrix, dtype=np.float32)

In [8]:
# set up word dictionary to index rows of matrix by word
word_dic = {}
for i in range(0, len(word_list)):
    word_dic[word_list[i]] = i

In [9]:
def normalize_vec(vec):
    sq_vec = vec**2
    sum_sq = np.sum(sq_vec)
    mag = np.sqrt(sum_sq)
    normed_vec = vec / mag
    return normed_vec

In [10]:
def normalize_matrix(mat):
    sq_mat = mat**2
    sum_sq = np.sum(sq_mat, axis=1)
    mag = np.sqrt(sum_sq)
    mag[mag == 0] = 1
    normed_mat = np.transpose((np.transpose(mat) / mag))
    return normed_mat

In [11]:
def make_memory(word_list):
    mem_matrix = []
    for i in word_list:
        mem_matrix.append(word_matrix[word_dic[i]])
    mem_matrix = np.array(mem_matrix)
    return mem_matrix

In [12]:
def echo_intensity(probes, memory, tau=3):
    normed_memory = normalize_matrix(memory)
    similarities = probes @ np.transpose(normed_memory)
    if tau == 2:
        activations = similarities*(abs(similarities))
    if tau == 4:
        activations = similarities*(abs(similarities))*similarities*(abs(similarities))
    else:
        activations = similarities**tau
    activations = np.sum(activations, axis=1)
    return activations

In [13]:
import scipy.stats
def r_sq(x, y):
    cor = scipy.stats.pearsonr(x, y)
    return cor[0]**2

def mse(x, y):
    return np.sum((x - y)**2) / len(x)

In [14]:
df = []
with open('Stimuli_Phonological_False_Memory.csv') as f:
    csvreader = csv.reader(f)
    for line in csvreader:
        df.append(line)

In [15]:
df = df[1:]

In [16]:
df_a = df[:10]

In [17]:
df_b = df[10:]

In [18]:
# prepare list a
df_test_list_a = []
for i in df_a:
    for x in i[1:]:
        df_test_list_a.append(x)
for i in df_a:
    df_test_list_a.append(i[0])
for i in df_b:
    for x in i[1:]:
        df_test_list_a.append(x)
for i in df_b:
    df_test_list_a.append(i[0])

In [19]:
# prepare list b
df_test_list_b = []
for i in df_b:
    for x in i[1:]:
        df_test_list_b.append(x)
for i in df_b:
    df_test_list_b.append(i[0])
for i in df_a:
    for x in i[1:]:
        df_test_list_b.append(x)
for i in df_a:
    df_test_list_b.append(i[0])

Simulations

In [20]:
emp_data = [0.682     , 0.578     , 0.35033333, 0.4]       

In [21]:
l = 0.03
t = 3
p_old = 51
sim_list = []
for s in range(0, 1000):
    if s < 500:
        stims = df_test_list_a.copy()
    else:
        stims = df_test_list_b.copy()
    memory = make_memory(stims[0:60])
    memory = memory * np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
    probes = make_memory(stims)
    familiarities = echo_intensity(probes, memory, tau=t)
    criterion = np.percentile(familiarities, 100-p_old)
    old_items = familiarities[0:60]
    crit_lure = familiarities[60:70]
    unrel_items = familiarities[70:130]
    uncrit_items = familiarities[130:140]
    old_hits = np.sum(old_items > criterion) / 60
    crit_hits = np.sum(crit_lure > criterion) / 10
    unrel_hits = np.sum(unrel_items > criterion) / 60
    uncrit_hits = np.sum(uncrit_items > criterion) / 10
    sim_list.append([old_hits, crit_hits, unrel_hits, uncrit_hits])
means = np.mean(sim_list, axis=0)
sds = np.std(sim_list, axis=0)
print('itemtype:', ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure'])
print('means: ', means)
print('SDs: ', sds)
print('R-sq fit:', r_sq(emp_data, means))

itemtype: ['target list/list item', 'target list/critical lure', 'foil list/list item', 'foil list/critical lure']
means:  [0.6565     0.5434     0.37888333 0.3443    ]
SDs:  [0.0501218  0.15052056 0.05470555 0.13640935]
R-sq fit: 0.9468866119795487


In [27]:
# loop to find best parameters
para_list = []
t = 3
p_old = 51
for L in range(1, 11, 1):
    l = L / 100
    sim_list = []
    for s in range(0, 1000):
        if s < 500:
            stims = df_test_list_a.copy()
        else:
            stims = df_test_list_b.copy()
        memory = make_memory(stims[0:60])
        memory = memory * np.random.choice([0, 1], size=(len(memory), len(memory[0])), p=[1-l, l])
        probes = make_memory(stims)
        familiarities = echo_intensity(probes, memory, tau=t)
        criterion = np.percentile(familiarities, 100-p_old)
        old_items = familiarities[0:60]
        crit_lure = familiarities[60:70]
        unrel_items = familiarities[70:130]
        uncrit_items = familiarities[130:140]
        old_hits = np.sum(old_items > criterion) / 60
        crit_hits = np.sum(crit_lure > criterion) / 10
        unrel_hits = np.sum(unrel_items > criterion) / 60
        uncrit_hits = np.sum(uncrit_items > criterion) / 10
        sim_list.append([old_hits, crit_hits, unrel_hits, uncrit_hits])
    means = np.mean(sim_list, axis=0)
    sds = np.std(sim_list, axis=0)
    r_fit = r_sq(means, emp_data)
    mse_fit = mse(means, emp_data)
    para_list.append([l, r_fit, mse_fit, means])

In [28]:
def sel_third(x):
    return x[2]

In [29]:
# sort by lowest mean square error
para_list.sort(key=sel_third)

In [30]:
para_list

[[0.03,
  0.946843158830262,
  0.0013847595579166757,
  array([0.65783333, 0.5526    , 0.37675   , 0.3399    ])],
 [0.04,
  0.951867764278088,
  0.0028158469944443527,
  array([0.70201667, 0.5574    , 0.33866667, 0.2985    ])],
 [0.02,
  0.9271806276941146,
  0.003411603804527679,
  array([0.60466667, 0.5327    , 0.42471667, 0.391     ])],
 [0.05,
  0.9671184587772628,
  0.005540265819444397,
  array([0.74121667, 0.5778    , 0.30033333, 0.2729    ])],
 [0.01,
  0.9069826507274845,
  0.008684409706944321,
  array([0.55681667, 0.5175    , 0.46616667, 0.4446    ])],
 [0.06,
  0.9726962520120538,
  0.00929376222819444,
  array([0.77328333, 0.5865    , 0.27058333, 0.2503    ])],
 [0.07,
  0.9747983139237092,
  0.01313021767669463,
  array([0.79843333, 0.5897    , 0.24801667, 0.2316    ])],
 [0.08,
  0.9741979915012103,
  0.018019124307611618,
  array([0.82328333, 0.5922    , 0.22656667, 0.2087    ])],
 [0.09,
  0.9780627975772402,
  0.021874143165583993,
  array([0.8417    , 0.6043    , 0.2