In [58]:
import warnings
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,KFold
from sklearn.metrics import accuracy_score,log_loss
from sklearn.neural_network import multilayer_perceptron
from sklearn.model_selection import GridSearchCV,PredefinedSplit
warnings.filterwarnings('ignore')

In [59]:
training_df=pd.read_csv("data/optdigits.tra",header=None)
training_df=training_df[((training_df.iloc[:,-1]==1) | (training_df.iloc[:,-1]==7))]
training_df.replace({7:0},inplace=True)
DIMENSIONS=64
models=[]
data_partitions=[]
kfold=KFold(n_splits=10,shuffle=True,random_state=2)
for train_index,test_index in kfold.split(training_df):
    data_partitions.append((training_df.iloc[train_index,:-1].values,training_df.iloc[train_index,-1].values,training_df.iloc[test_index,:-1].values,training_df.iloc[test_index,-1].values))

In [60]:
class LogisticRegression:
    def __init__(self,learning_rate,momentum,dimensions,a,b):
        self.learning_rate=learning_rate
        self.momentum=momentum
        self.dimensions=dimensions
        self.weights=np.random.uniform(low=0,high=0.0001,size=(dimensions,1))
        self.a=a
        self.b=b
    def print_model_parameters(self,i):
        print("*"*45)
        print("Run:{0}\nInitial learning rate:{1}\nInitial momentum:{2}".format(i,self.learning_rate,self.momentum))
    def sigmoid_activation(self,x):
        return 1/(1+np.exp(-x))
    def cross_entropy(self,y_true,y_pred):
        return log_loss(y_true,y_pred)
    def train(self,x_train,y_train,n_iter=50):
        #print(x_train.shape)
        errors=[]
        converged=False
        while converged==False and n_iter!=0:
            error=0
            n_iter-=1
            predictions=[]
            temp=np.zeros(shape=(self.dimensions))
            d_weights=np.zeros(shape=(self.dimensions))
            for i in range(x_train.shape[0]):
                o=0
                for j in range(x_train.shape[1]):
                    o+=(self.weights[j]*x_train[i,j])
                y_pred=self.sigmoid_activation(o)
                for j in range(x_train.shape[1]):
                    #d_weights[j]+=learning_rate*(y_train[i]-y_pred)*x_train[i,j]
                    d_weights[j]=(self.learning_rate*(y_train[i]-y_pred)*x_train[i,j])+(self.momentum*d_weights[j])
                predictions.append(y_train[i]-y_pred)
            isDecreasing=True
            errors.append(self.cross_entropy(y_train,np.array(predictions)))
            current_error=errors[-1]
            for i in range(len(errors)-1):
                if errors[i]<=current_error:
                    isDecreasing=False
                    break
            if isDecreasing:
                self.learning_rate+=self.a
            else:
                self.learning_rate-=(self.b*self.learning_rate)
            for j in range(self.dimensions):
                #self.weights[j]+=self.learning_rate*d_weights[j]
                self.weights[j]+=(d_weights[j])
            try:
                if errors[-2]==errors[-1]:
                    converged=True
            except IndexError:
                continue
        self.evaluate("Training",y_train,self.predict(x_train))
    def predict(self,x):
        y_pred=self.sigmoid_activation(np.sum(x*self.weights.T,axis=1))
        y_pred=np.array(y_pred>0.5,dtype=np.int16)
        return y_pred
    def evaluate(self,string,y_true,y_pred):
        print("{0} error rate for run :{1}".format(string,1-accuracy_score(y_true,y_pred)))

In [62]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,DIMENSIONS,0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train,y_train,10)
    models.append(model)
    model.evaluate("Test",y_test,model.predict(x_test))

*********************************************
Run:0
Initial learning rate:0.00040144162936975737
Initial momentum:0.9737286382324217
Training error rate for run :0.017191977077363862
Test error rate for run :0.012820512820512775
*********************************************
Run:1
Initial learning rate:0.0005401586661375765
Initial momentum:0.9799547161610317
Training error rate for run :0.015759312320916874
Test error rate for run :0.012820512820512775
*********************************************
Run:2
Initial learning rate:0.00021777353675207426
Initial momentum:0.9507496855473396
Training error rate for run :0.014326647564469885
Test error rate for run :0.012820512820512775
*********************************************
Run:3
Initial learning rate:0.00011092765858462998
Initial momentum:0.964516466744894
Training error rate for run :0.011461318051575908
Test error rate for run :0.012820512820512775
*********************************************
Run:4
Initial learning rate:0.0004564738

In [63]:
abs_avg_weights=[]
for model in models:
    abs_avg_weights.append(np.abs(model.weights))
abs_avg_weights=np.array(abs_avg_weights)
abs_avg_weights=abs_avg_weights.sum(axis=0)


In [64]:
after_elimination_10=abs_avg_weights.argsort()[:int(0.9*DIMENSIONS)]
after_elimination_25=abs_avg_weights.argsort()[:int(0.75*DIMENSIONS)]
after_elimination_50=abs_avg_weights.argsort()[:int(0.5*DIMENSIONS)]

In [65]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,after_elimination_10.shape[0],0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train[:,after_elimination_10].reshape(x_train.shape[0],after_elimination_10.shape[0]),y_train,10)
    model.evaluate("Test",y_test,model.predict(x_test[:,after_elimination_10].reshape(x_test.shape[0],after_elimination_10.shape[0])))

*********************************************
Run:0
Initial learning rate:0.00045883598948382365
Initial momentum:0.9427174532111307
Training error rate for run :0.504297994269341
Test error rate for run :0.47435897435897434
*********************************************
Run:1
Initial learning rate:0.0003679213169230272
Initial momentum:0.9434088191525474
Training error rate for run :0.501432664756447
Test error rate for run :0.5
*********************************************
Run:2
Initial learning rate:0.0006294009103371869
Initial momentum:0.9154931717148976
Training error rate for run :0.49426934097421205
Test error rate for run :0.5641025641025641
*********************************************
Run:3
Initial learning rate:0.00014431588068329843
Initial momentum:0.9353279091127247
Training error rate for run :0.497134670487106
Test error rate for run :0.5384615384615384
*********************************************
Run:4
Initial learning rate:0.0009440398279544215
Initial momentum:0.954

In [66]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,after_elimination_25.shape[0],0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train[:,after_elimination_25].reshape(x_train.shape[0],after_elimination_25.shape[0]),y_train,10)
    model.evaluate("Test",y_test,model.predict(x_test[:,after_elimination_25].reshape(x_test.shape[0],after_elimination_25.shape[0])))

*********************************************
Run:0
Initial learning rate:0.00031044320610571386
Initial momentum:0.9084721609927934
Training error rate for run :0.504297994269341
Test error rate for run :0.47435897435897434
*********************************************
Run:1
Initial learning rate:0.0007898039233347815
Initial momentum:0.963099391206883
Training error rate for run :0.501432664756447
Test error rate for run :0.5
*********************************************
Run:2
Initial learning rate:0.00011865751397149439
Initial momentum:0.9014998382086711
Training error rate for run :0.49426934097421205
Test error rate for run :0.5641025641025641
*********************************************
Run:3
Initial learning rate:0.0003708332262990524
Initial momentum:0.9081733311307482
Training error rate for run :0.497134670487106
Test error rate for run :0.5384615384615384
*********************************************
Run:4
Initial learning rate:0.000554611613489805
Initial momentum:0.97566

In [67]:
for i in range(10):
    learning_rate=np.random.uniform(low=0.0001,high=0.001)
    momentum=np.random.uniform(low=0.9,high=0.99)
    model=LogisticRegression(learning_rate,momentum,after_elimination_50.shape[0],0.0001,0.0002)
    x_train,y_train,x_test,y_test=data_partitions[i]
    model.print_model_parameters(i)
    model.train(x_train[:,after_elimination_50].reshape(x_train.shape[0],after_elimination_50.shape[0]),y_train,10)
    model.evaluate("Test",y_test,model.predict(x_test[:,after_elimination_50].reshape(x_test.shape[0],after_elimination_50.shape[0])))

*********************************************
Run:0
Initial learning rate:0.0004314317124762433
Initial momentum:0.9114046390542171
Training error rate for run :0.504297994269341
Test error rate for run :0.47435897435897434
*********************************************
Run:1
Initial learning rate:0.0008677861727984523
Initial momentum:0.9637681711920087
Training error rate for run :0.501432664756447
Test error rate for run :0.5
*********************************************
Run:2
Initial learning rate:0.000596276055638226
Initial momentum:0.9012985840717456
Training error rate for run :0.49426934097421205
Test error rate for run :0.5641025641025641
*********************************************
Run:3
Initial learning rate:0.0009466935931311976
Initial momentum:0.979663329896773
Training error rate for run :0.497134670487106
Test error rate for run :0.5384615384615384
*********************************************
Run:4
Initial learning rate:0.0008375454300433501
Initial momentum:0.9554767

In [68]:
parameters={
    'hidden_layer_sizes':[(10,20),(50,30),(100,10,2),(30,10,2)],
    'solver':['adam','sgd'],
    'alpha':[0.0001,0.00001,0.001]
}
skmodels=[]
for i,(x_train,y_train,x_test,y_test) in enumerate(data_partitions):
    print("\nRun {0}".format(i))
    model=GridSearchCV(MLPClassifier(),param_grid=parameters,n_jobs=-1,cv=2,verbose=3,scoring='neg_log_loss')
    model.fit(x_train,y_train)
    worst_training_error=model.cv_results_['mean_test_score']
    print("Training error:{0}".format(-np.min(worst_training_error)))
    test_model=MLPClassifier(**model.cv_results_['params'][np.argmin(worst_training_error)])
    test_model.fit(x_train,y_train)
    print("Testing error:{0}".format(1-accuracy_score(y_test,test_model.predict(x_test))))
    print("Parameters:{0}".format(model.cv_results_['params'][np.argmin(worst_training_error)]))    
    skmodels.append(model)


Run 0
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    3.0s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    3.3s finished


Training error:0.5274580533023601
Testing error:0.012820512820512775
Parameters:{'alpha': 0.0001, 'hidden_layer_sizes': (30, 10, 2), 'solver': 'adam'}

Run 1
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    1.9s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    2.2s finished


Training error:0.44966264147299695
Testing error:0.0
Parameters:{'alpha': 0.0001, 'hidden_layer_sizes': (100, 10, 2), 'solver': 'adam'}

Run 2
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.0s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    2.2s finished


Training error:0.4319434804122697
Testing error:0.5641025641025641
Parameters:{'alpha': 1e-05, 'hidden_layer_sizes': (30, 10, 2), 'solver': 'adam'}

Run 3
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.1s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    2.5s finished


Training error:0.3978948397602106
Testing error:0.012820512820512775
Parameters:{'alpha': 0.0001, 'hidden_layer_sizes': (100, 10, 2), 'solver': 'adam'}

Run 4
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.5s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    3.1s finished


Training error:0.4516806479833174
Testing error:0.012820512820512775
Parameters:{'alpha': 1e-05, 'hidden_layer_sizes': (30, 10, 2), 'solver': 'sgd'}

Run 5
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.6s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    3.0s finished


Training error:0.6203101711222482
Testing error:0.5769230769230769
Parameters:{'alpha': 1e-05, 'hidden_layer_sizes': (100, 10, 2), 'solver': 'adam'}

Run 6
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.5s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    2.9s finished


Training error:0.5255274826102947
Testing error:0.0
Parameters:{'alpha': 1e-05, 'hidden_layer_sizes': (30, 10, 2), 'solver': 'sgd'}

Run 7
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.8s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    3.2s finished


Training error:0.4476013065839399
Testing error:0.0
Parameters:{'alpha': 0.001, 'hidden_layer_sizes': (100, 10, 2), 'solver': 'adam'}

Run 8
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.6s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    2.7s finished


Training error:0.5691939539983523
Testing error:0.012987012987012991
Parameters:{'alpha': 0.001, 'hidden_layer_sizes': (30, 10, 2), 'solver': 'adam'}

Run 9
Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    2.9s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    3.1s finished


Training error:0.42985905756328013
Testing error:0.0
Parameters:{'alpha': 1e-05, 'hidden_layer_sizes': (30, 10, 2), 'solver': 'adam'}


Fitting 2 folds for each of 24 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  42 out of  48 | elapsed:    1.7s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    1.8s finished


GridSearchCV(cv=2, error_score=nan,
             estimator=MLPClassifier(activation='relu', alpha=0.0001,
                                     batch_size='auto', beta_1=0.9,
                                     beta_2=0.999, early_stopping=False,
                                     epsilon=1e-08, hidden_layer_sizes=(100,),
                                     learning_rate='constant',
                                     learning_rate_init=0.001, max_fun=15000,
                                     max_iter=200, momentum=0.9,
                                     n_iter_no_change=10,
                                     nesterovs_momentum=True, power_t=0.5,
                                     random_state=None, shuffle=True,
                                     solver='adam', tol=0.0001,
                                     validation_fraction=0.1, verbose=False,
                                     warm_start=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'

In [50]:
model.cv_results_['params'][np.argmin(model.cv_results_['mean_test_score'])]

{'alpha': 1e-05, 'hidden_layer_sizes': (100, 10, 2), 'solver': 'adam'}

In [52]:
model.estimator

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)