In [1]:
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import

import codecs
import io
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from urllib import request
import zipfile

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

if sys.version_info[0] == 2:
    import cPickle as pickle
else:
    import pickle

import wbai_aphasia

In [2]:
#data file for PMSP96
filename = 'pmsp96.pk'
with codecs.open(filename,'rb') as f:
    data = pickle.load(f)       

O, P, S, words = data['orthography'], data['phonology'], data['semantics'], data['words']

rows, o_cols, s_cols, p_cols = len(O), len(O['ace']), len(S['ace']), len(P['ace'])
Oa = np.zeros((rows, o_cols), dtype=np.float32)
Pa = np.zeros((rows, p_cols), dtype=np.float32)
Sa = np.zeros((rows, s_cols), dtype=np.float32)

for i, word in enumerate(sorted(O)):
    Oa[i], Pa[i], Sa[i] = O[word], P[word], S[word]

print(type(Oa), type(Pa), type(Sa), Oa.shape, Pa.shape, Sa.shape)
OS = np.concatenate((Oa,Sa),axis=1)

<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'> (2985, 105) (2985, 61) (2985, 200)


In [3]:
# modeling from othography + semantics to phonology
X_train, X_test, y_train, y_test = train_test_split(OS, Pa, test_size=0.2)
model = MLPClassifier(verbose=False,max_iter=3000)
model.fit(X_train,y_train)
model.score(X_test,y_test)
y_test_ = model.predict(X_test)
np.mean((y_test_ - y_test)**2) 

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=3000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

0.5728643216080402

0.011176099074608013

In [4]:
# modeling from orthography to phonology
X_train, X_test, y_train, y_test = train_test_split(Oa, Pa, test_size=0.2)
model = MLPClassifier(verbose=False,max_iter=3000)
model.fit(X_train,y_train)
model.score(X_test,y_test)
y_test_ = model.predict(X_test)
np.mean((y_test_ - y_test)**2) 

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=3000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

0.7319932998324958

0.006864925721503694

In [5]:
# modeling from semantics to phonology
X_train, X_test, y_train, y_test = train_test_split(Sa, Pa, test_size=0.2)
model = MLPClassifier(verbose=False,max_iter=3000)
model.fit(X_train,y_train)
model.score(X_test,y_test)
y_test_ = model.predict(X_test)
np.mean((y_test_ - y_test)**2) 

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=3000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

0.0

0.10849328610264437

In [None]:
x_prev = -1
for word in words:
    x = words[word]
    if (x_prev + 1) != x:
        print(x, word)
    x_prev = x