# WBAI Aphasia (Pure Alexia) Handson 00
<!-- green '#007879' -->

<br>
<div align='center'>
    <font size='+2' color='#0070FF' align='right'>17/Sep/2018</font><br><br>
<!--<font size='+2' color='#0070FF' align='center'><strong>浅川 伸一</strong> &lt;asakawa@ieee.org&gt;</font>-->
    <font size='+2' color='#0070FF' align='center'><strong><a href="http://www.cis.twcu.ac.jp/~asakawa/">浅川 伸一</a> &lt;asakawa@ieee.org&gt;</strong></font>
    <br><br>
</div>
<br>
<img src='https://wba-initiative.org/wp-content/uploads/2015/05/logo.png' width='29%' align='cener'>
<br>

In [None]:
# -*-: coding utf-8 -*-
import sys
import numpy as np
import codecs
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
### preparation to draw graphs
plt.rcParams['figure.figsize'] = (12, 8) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

np.set_printoptions(precision=3, suppress=True)

In [None]:
import wbai_aphasia as handson
from wbai_aphasia import tanh, sigmoid, relu
from wbai_aphasia import xavier_initializer

In [None]:
# Here the enviroment that I am computing.
!date; printf '\n'
!uname -a; printf '\n'
!type python; printf '\n'
!python --version; printf '\n'
!gcc --version; printf '\n'
!conda --version; printf '\n'

In [None]:
original_file='../data/PMSP96.orig'
with codecs.open(original_file,'r') as f:
    lines = f.readlines()

---
<img src='./assets/pmsp96Fig1.png' align='center' width='39%'>
<div align='center'>Plaut et. al. (1996) Fig. 1</div>
<p>

- Plaut, D., McClelland, J. L., Seidenberg, M. S., & Patterson, K. (1996). Understanding normal and impaired word reading: Computational principles in quasi-regular domains. _Psychological Review_, 103, 56-115.

<font color='green'>We will refer to the above as PMSP96 henceforth.</font>

---


---
<img src="./assets/pmsp96Tab2.png" width="74%" align="center">
<div align='center'>PMSP96 Table 2</div>

---                                                      

In [None]:
filename = '../data/PMSP96.orig'
x = codecs.open(filename,'r','utf-8').readlines()
x[:4]

In [None]:
inp, inpStr, out, outStr, freq = list(), list(), list(), list(), list()
wrd_class = {}  # dict
for i, line in enumerate(lines):
    x = lines[i].strip().split()
    if i % 3 == 0:
        inpStr.append(x[2])
        outStr.append(x[3])
        freq.append(x[4])
        #if x[5] in wrd_class:
        #    wrd_class[x[5]] += 1
        #else:
        #    wrd_class[x[5]] = 1
        wrd_class[x[5]] = wrd_class[x[5]] + 1 if x[5] in wrd_class else 1
    elif i % 3 == 1:
        inp.append(np.array(x,dtype=np.int32))
    else:
        out.append(np.array(x,dtype=np.int32))

X = np.array(inp)
y = np.array(out)

In [None]:
#n = 0
#for k in wrd_class:
#    n += wrd_class[k]
n = 0
for k in sorted(wrd_class):
    n += wrd_class[k] if k is not '#' else 0
    print('{0:7s} {1:03d}'.format(k, wrd_class[k]))
print('---\nTotal: {} except for #'.format(n))

---
<img src="./assets/pmsp96A1.png" width="74%" align="center">
<div align='center'>PMS96 Appendix A</div>

---                                                      

---
# Today task that we must tackle with is below

<img src='./assets/pmsp96Tab7.png' align='center' width='74%'>
<div align='center'>PMSP96 Table 7</div>

---

<img src='./assets/1979GlushkoA1.jpg' align='center' width='74%'>
<div align='center'>Glushko (1979) Appendix Table 1</div>

- Glushko, R. J. (1979). The organization and activation of orthographic knowledge in reading aloud. _Journal of Experimental Psyhology: Human Perception and Performance_, 5, 674-691.

---

In [None]:
nKfold = 5
random_state = 2
perms = np.random.RandomState(random_state).permutation(len(X)) % nKfold

In [None]:
hidden_layers = (128, 128)
params = [{'hidden_layer_sizes': hidden_layers, 'solver': 'sgd', 'learning_rate': 'constant', 
           'momentum': 0.5, 'nesterovs_momentum': False, 'learning_rate_init': 0.1, 
           'activation': 'relu'},
          {'hidden_layer_sizes': hidden_layers, 'solver': 'sgd', 'learning_rate': 'constant', 
           'momentum': 0.5, 'nesterovs_momentum': False, 'learning_rate_init': 0.1, 'activation': 
           'tanh'},
          {'hidden_layer_sizes': hidden_layers, 'solver': 'adam', 'learning_rate_init': 0.01, 
           'activation': 'relu'},
          {'hidden_layer_sizes': hidden_layers, 'solver': 'adam', 'learning_rate_init': 0.01, 
           'activation': 'tanh'}
         ]

labels = [ 'SGD, relu', 'SGD, tanh', 'Adam, relu', 'Adam, tanh']

plot_args = [{'c': 'red', 'linestyle': '-', 'label': 'SGD, relu', 'linewidth': 1},
             {'c': 'green', 'linestyle': '--', 'label': 'SGD, tanh', 'linewidth': 3},
             {'c': 'blue', 'linestyle': '-', 'label': 'Adam, relu', 'linewidth': 1},
             {'c': 'black', 'linestyle': '--', 'label' : 'Adam, tanh', 'linewidth': 3}]

In [None]:
mlps = []
for label, param, plot_arg in zip(labels, params, plot_args):
    print('-' * 16)
    print('training: {}'.format(label))
    print('-' * 16)
    
    mlp = MLPClassifier(max_iter=200, 
                        alpha=1e-4,
                        verbose=False, 
                        tol=1e-4, 
                        random_state=0, 
                        early_stopping=False,
                        **param)
    
    for i in range(nKfold):
        X_train, y_train = X[perms != i], y[perms != i]
        X_test, y_test = X[perms == i], y[perms == i]
        mlp.fit(X_train, y_train)
        plt.plot(mlp.loss_curve_, **plot_arg)
        print("Training accuracy: {:.3f}".format(mlp.score(X_train, y_train)), end=' ')
        print("Test accuracy: {:.3f}".format(mlp.score(X_test,y_test)))
    plt.legend(loc='upper right')
