In [None]:
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import

import codecs
import io
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from urllib import request
import zipfile

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

if sys.version_info[0] == 2:
    import cPickle as pickle
else:
    import pickle

import wbai_aphasia

In [None]:
#data file for PMSP96
filename = 'pmsp96.pk'
with codecs.open(filename,'rb') as f:
    data = pickle.load(f)       

O, P, S, words = data['orthography'], data['phonology'], data['semantics'], data['words']
words, phoneme = data['words'], data['phoneme']

rows, o_cols, s_cols, p_cols = len(O), len(O['ace']), len(S['ace']), len(P['ace'])
Oa = np.zeros((rows, o_cols), dtype=np.float32)
Pa = np.zeros((rows, p_cols), dtype=np.float32)
Sa = np.zeros((rows, s_cols), dtype=np.float32)

for i, word in enumerate(sorted(O)):
    Oa[i], Pa[i], Sa[i] = O[word], P[word], S[word]

print(type(Oa), type(Pa), type(Sa), Oa.shape, Pa.shape, Sa.shape)
OS = np.concatenate((Oa,Sa),axis=1)

In [None]:
# modeling from othography + semantics to phonology
X_train, X_test, y_train, y_test = train_test_split(OS, Pa, test_size=0.2)
model = MLPClassifier(verbose=False,max_iter=3000)
model.fit(X_train,y_train)
model.score(X_test,y_test)
y_test_ = model.predict(X_test)
np.mean((y_test_ - y_test)**2) 

In [None]:
# modeling from orthography to phonology
X_train, X_test, y_train, y_test = train_test_split(Oa, Pa, test_size=0.2)
model = MLPClassifier(verbose=False,max_iter=3000)
model.fit(X_train,y_train)
model.score(X_test,y_test)
y_test_ = model.predict(X_test)
np.mean((y_test_ - y_test)**2) 

In [None]:
# modeling from semantics to phonology
X_train, X_test, y_train, y_test = train_test_split(Sa, Pa, test_size=0.2)
model = MLPClassifier(verbose=False,max_iter=3000)
model.fit(X_train,y_train)
model.score(X_test,y_test)
y_test_ = model.predict(X_test)
np.mean((y_test_ - y_test)**2) 

In [None]:
x_prev = -1
for word in words:
    x = words[word]
    if (x_prev + 1) != x:
        print(x, word)
    x_prev = x