In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from time import time

In [2]:
# reading training set
df_train = pd.read_csv("/Users/fabbas1/Google Drive/study/Phd/Machine Learning/assignment/ITCS6156_SLProject/DigitRecognition/optdigits_raining.csv", 
                 header=None)
# reading test set
df_test = pd.read_csv("/Users/fabbas1/Google Drive/study/Phd/Machine Learning/assignment/ITCS6156_SLProject/DigitRecognition/optdigits_test.csv", 
                 header=None)

In [3]:
# split features from target values
X_train =  df_train.iloc[:, :64]
y_train = df_train.iloc[: , 64:]
print(y_train.shape)
print(X_train.shape)

(3823, 1)
(3823, 64)


In [4]:
# splite testing data 
X_test = df_test.iloc[:, :64]
y_test = df_test.iloc[: , 64:]

In [5]:
# build a normalizer
scaler = StandardScaler()

In [6]:
# normalize training and test set between [-1,1] with 0 mean and 1 standard deviation
scaler.fit(X_train)  
X_train_normalize = scaler.transform(X_train)  
X_test_normalize = scaler.transform(X_test)

In [7]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(2, 2), random_state=1)

In [9]:
# measure time
start = time()
clf.fit(X_train,y_train.values.ravel())
print( "Train - Without normalizing " , clf.score(X_train,y_train.values.ravel()) , " in " , time() - start , " seconds")
start = time()
clf.fit(X_train_normalize,y_train.values.ravel())
print( "Train - With normalizing " , clf.score(X_train_normalize,y_train.values.ravel()) , " in " , time() - start , " seconds" )

Train - Without normalizing  0.101752550353  in  0.062100887298583984  seconds
Train - With normalizing  0.606853256605  in  0.7990338802337646  seconds


In [12]:
#‘identity’, ‘logistic’, ‘tanh’, ‘relu’
activation_functions = ['identity', 'logistic', 'tanh', 'relu']
for activation_function in activation_functions:
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, activation=activation_function,
                     hidden_layer_sizes=(2, 2), random_state=1)
    print(activation_function)
    # measure time
    start = time()
    clf.fit(X_train,y_train.values.ravel())
    print( "Without normalizing " , clf.score(X_train,y_train.values.ravel()) , " in " , time() - start , " seconds")
    start = time()
    clf.fit(X_train_normalize,y_train.values.ravel())
    print( "With normalizing " , clf.score(X_train_normalize,y_train.values.ravel()) , " in " , time() - start , " seconds" )

identity
Without normalizing  0.732409102799  in  0.9414200782775879  seconds
With normalizing  0.803557415642  in  0.6035029888153076  seconds
logistic
Without normalizing  0.29296364112  in  0.7990288734436035  seconds
With normalizing  0.404394454617  in  0.6328220367431641  seconds
tanh
Without normalizing  0.208998168977  in  0.8986148834228516  seconds
With normalizing  0.517656290871  in  0.8204119205474854  seconds
relu
Without normalizing  0.101752550353  in  0.03566789627075195  seconds
With normalizing  0.606853256605  in  0.6361198425292969  seconds


In [14]:
#‘identity’, ‘logistic’, ‘tanh’, ‘relu’
weight_optimizer = ['sgd', 'lbfgs', 'adam' ]
for solver in weight_optimizer:
    clf = MLPClassifier(solver=solver, alpha=1e-5, activation='identity',
                     hidden_layer_sizes=(2, 2), random_state=1)
    print(solver)
    # measure time
    start = time()
    clf.fit(X_train_normalize,y_train.values.ravel())
    print( "With normalizing " , clf.score(X_train_normalize,y_train.values.ravel()) , " in " , time() - start , " seconds" )

sgd




With normalizing  0.662568663353  in  1.7647180557250977  seconds
lbfgs
With normalizing  0.803557415642  in  0.6178781986236572  seconds
adam
With normalizing  0.75621239864  in  1.8466999530792236  seconds


In [26]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
for i in range(1,20):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, activation='identity',
                     hidden_layer_sizes=(i,), random_state=1)
    start = time()
    scores = cross_val_score(clf, X_train_normalize, y_train.values.ravel(), cv=cv)
    print(str(i) , " " , scores.mean() , " " , time() - start)

1   0.459285091543   2.3443610668182373
2   0.77593722755   2.2074170112609863
3   0.887358326068   2.131673812866211
4   0.903400174368   2.1041691303253174
5   0.920139494333   2.148458957672119
6   0.934960767219   0.714695930480957
7   0.949782040105   0.61136794090271
8   0.949433304272   0.5179600715637207
9   0.949782040105   0.5154130458831787
10   0.95658238884   0.5122199058532715
11   0.951700087184   0.5258491039276123
12   0.954664341761   0.523129940032959
13   0.953966870096   0.5029819011688232
14   0.952223190933   0.507411003112793
15   0.956233653008   0.5046780109405518
16   0.95658238884   0.5056488513946533
17   0.95379250218   0.5607068538665771
18   0.95658238884   0.5135438442230225
19   0.95379250218   0.5195498466491699


In [24]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
for j in range(1,11):
    for i in range(1,11):
        clf = MLPClassifier(solver='lbfgs', alpha=1e-5, activation='identity',
                         hidden_layer_sizes=(i,j), random_state=1)
        start = time()
        scores = cross_val_score(clf, X_train_normalize, y_train.values.ravel(), cv=cv)
        print(str(j) , " " , str(i) , " " , scores.mean() , " " , time() - start)

1   1   0.455972101133   2.127701997756958
1   2   0.427027027027   2.0518689155578613
1   3   0.458238884045   2.074265956878662
1   4   0.463469921534   2.055495023727417
1   5   0.451089799477   2.0879600048065186
1   6   0.426503923278   2.142798900604248
1   7   0.424760244115   2.222460985183716
1   8   0.443068875327   2.1504440307617188
1   9   0.420924149956   2.342406988143921
1   10   0.392327811683   2.2957539558410645
2   1   0.445510026155   1.8914971351623535
2   2   0.786399302528   2.5541698932647705
2   3   0.770531822145   2.29443097114563
2   4   0.768265039233   2.3217930793762207
2   5   0.752223190933   2.6641299724578857
2   6   0.773147340889   2.398036003112793
2   7   0.740191804708   2.475847005844116
2   8   0.774367916303   2.6544439792633057
2   9   0.77262423714   2.3639779090881348
2   10   0.744376634699   2.426102876663208
3   1   0.458936355711   2.2066948413848877
3   2   0.766870095902   2.261491060256958
3   3   0.901133391456   2.3562159538269043

In [31]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, activation='identity',
                     hidden_layer_sizes=(10,), random_state=1,max_iter=200)
clf.fit(X_train_normalize, y_train.values.ravel())
clf.score(X_test_normalize,y_test)


0.9393433500278241