In [1]:
import sklearn
import sklearn.tree
import numpy as np
from io import StringIO
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')

In [2]:
file = open('OnlineNewsPopularity.csv','r')

In [3]:
filelist = list()

for line in file:
    newLine = line.rstrip()
    x = np.array(newLine.split(","))
    filelist.append(x)

file.close()

In [4]:
filelist[1]

array(['http://mashable.com/2013/01/07/amazon-instant-video-browser/',
       ' 731.0', ' 12.0', ' 219.0', ' 0.663594466988', ' 0.999999992308',
       ' 0.815384609112', ' 4.0', ' 2.0', ' 1.0', ' 0.0', ' 4.6803652968',
       ' 5.0', ' 0.0', ' 1.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0',
       ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0',
       ' 496.0', ' 496.0', ' 496.0', ' 1.0', ' 0.0', ' 0.0', ' 0.0',
       ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.500331204081',
       ' 0.378278929586', ' 0.0400046751006', ' 0.0412626477296',
       ' 0.0401225435029', ' 0.521617145481', ' 0.0925619834711',
       ' 0.0456621004566', ' 0.013698630137', ' 0.769230769231',
       ' 0.230769230769', ' 0.378636363636', ' 0.1', ' 0.7', ' -0.35',
       ' -0.6', ' -0.2', ' 0.5', ' -0.1875', ' 0.0', ' 0.1875', ' 593'],
      dtype='|S60')

In [5]:
filearray = np.array(filelist)


In [6]:
filearray.shape

(39645L, 61L)

In [7]:
data = filearray[1:,:-1]
y = filearray[1:,-1]

In [8]:
print (y)

[' 593' ' 711' ' 1500' ... ' 1900' ' 1100' ' 1300']


In [9]:
y = [x.strip(' ') for x in y]

In [10]:
y = [float(x) for x in y]

In [11]:
type(y[3])

float

In [12]:
Y = [1 if x >1400 else 0 for x in y]

In [13]:
Y = np.array(Y)

In [14]:
mlp_clf = MLPClassifier(hidden_layer_sizes=(30,30,30))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [15]:
print("predicted:", mlp_clf.predict(data[-2:,1:].astype(float)))
print("truth", Y[-2:])

('predicted:', array([0, 0]))
('truth', array([0, 0]))


In [16]:
#1-1 Use 2 hidden layers with 20 nodes in each layer

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [17]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 10.708644951557833)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [18]:
#1-2 Use 2 hidden layers with 20 nodes in each layer, tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [19]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6914463112885956)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [20]:
#2-1 Use 2 hidden layers with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [21]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 10.768781244523826)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [22]:
#2-2 Use 2 hidden layers with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [23]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6902934092321684)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [24]:
#3-1 Use 5 hidden layers with 20 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20, 20, 20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [25]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.7061767349969924)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [26]:
#3-2 Use 5 hidden layers with 20 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20, 20, 20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [27]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6932473980527416)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [28]:
#4-1 Use 5 hidden with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100, 100, 100, 100),
       learning_rate='constant', learning_rate_init=0.001, max_iter=200,
       momentum=0.9, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [29]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 5.91001618872255)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [30]:
#4-2 Use 5 hidden with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100, 100, 100, 100),
       learning_rate='constant', learning_rate_init=0.001, max_iter=200,
       momentum=0.9, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [31]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6920912550769159)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [32]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn import metrics

kf = KFold(n_splits=5)
kf.get_n_splits(data[:,1:])

5

In [33]:
#1-1 Use 2 hidden layers with 20 nodes in each layer
datax = data[:,1:]
Y = np.array(Y)

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20))
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)


             precision    recall  f1-score   support

          0       0.46      0.56      0.51      3525
          1       0.58      0.49      0.53      4404

avg / total       0.53      0.52      0.52      7929

             precision    recall  f1-score   support

          0       0.65      0.01      0.02      4423
          1       0.44      0.99      0.61      3506

avg / total       0.56      0.44      0.28      7929

             precision    recall  f1-score   support

          0       0.59      0.18      0.28      3733
          1       0.55      0.89      0.68      4196

avg / total       0.57      0.55      0.49      7929

             precision    recall  f1-score   support

          0       0.50      0.99      0.66      3919
          1       0.75      0.02      0.04      4010

avg / total       0.62      0.50      0.35      7929

             precision    recall  f1-score   support

          0       0.52      0.01      0.03      4482
          1       0.43      0.98 

In [34]:
#1-2 Use 2 hidden layers with 20 nodes in each layer, tanh

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), activation = 'tanh')
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)


             precision    recall  f1-score   support

          0       0.45      0.88      0.59      3525
          1       0.58      0.13      0.21      4404

avg / total       0.52      0.46      0.38      7929

             precision    recall  f1-score   support

          0       0.56      0.95      0.71      4423
          1       0.54      0.07      0.12      3506

avg / total       0.55      0.56      0.45      7929

             precision    recall  f1-score   support

          0       0.48      0.86      0.61      3733
          1       0.57      0.17      0.26      4196

avg / total       0.53      0.49      0.43      7929

             precision    recall  f1-score   support

          0       0.44      0.41      0.42      3919
          1       0.46      0.50      0.48      4010

avg / total       0.45      0.45      0.45      7929

             precision    recall  f1-score   support

          0       0.59      0.81      0.68      4482
          1       0.52      0.27 

In [35]:
#2-1 Use 2 hidden layers with 100 nodes in each layer.

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100) )
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.44      0.83      0.58      3525
          1       0.55      0.16      0.25      4404

avg / total       0.50      0.46      0.40      7929

             precision    recall  f1-score   support

          0       0.00      0.00      0.00      4423
          1       0.44      1.00      0.61      3506

avg / total       0.20      0.44      0.27      7929

             precision    recall  f1-score   support

          0       0.40      0.01      0.02      3733
          1       0.53      0.99      0.69      4196

avg / total       0.47      0.53      0.37      7929

             precision    recall  f1-score   support

          0       0.50      0.98      0.66      3919
          1       0.60      0.03      0.05      4010

avg / total       0.55      0.50      0.35      7929

             precision    recall  f1-score   support

          0       0.67      0.56      0.61      4482
          1       0.53      0.65 

In [36]:
#2-2 Use 2 hidden layers with 100 nodes in each layer. tanh

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100), activation = 'tanh')
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.43      0.19      0.27      3525
          1       0.55      0.80      0.65      4404

avg / total       0.50      0.53      0.48      7929

             precision    recall  f1-score   support

          0       0.56      0.88      0.69      4423
          1       0.48      0.13      0.21      3506

avg / total       0.52      0.55      0.48      7929

             precision    recall  f1-score   support

          0       0.47      0.96      0.64      3733
          1       0.60      0.05      0.09      4196

avg / total       0.54      0.48      0.35      7929

             precision    recall  f1-score   support

          0       0.54      0.67      0.60      3919
          1       0.58      0.45      0.50      4010

avg / total       0.56      0.56      0.55      7929

             precision    recall  f1-score   support

          0       0.58      0.92      0.71      4482
          1       0.53      0.11 

In [37]:
#3-1 Use 5 hidden layers with 20 nodes in each layer.

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20) )
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.45      0.62      0.52      3525
          1       0.56      0.39      0.46      4404

avg / total       0.51      0.49      0.48      7929

             precision    recall  f1-score   support

          0       0.66      0.52      0.58      4423
          1       0.52      0.67      0.59      3506

avg / total       0.60      0.59      0.59      7929

             precision    recall  f1-score   support

          0       0.57      0.34      0.43      3733
          1       0.57      0.77      0.65      4196

avg / total       0.57      0.57      0.55      7929

             precision    recall  f1-score   support

          0       0.50      0.98      0.66      3919
          1       0.73      0.04      0.08      4010

avg / total       0.62      0.51      0.37      7929

             precision    recall  f1-score   support

          0       0.60      0.57      0.59      4482
          1       0.47      0.50 

In [38]:
#3-2 Use 5 hidden layers with 20 nodes in each layer. tnah

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20), activation = 'tanh')
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.45      0.62      0.52      3525
          1       0.57      0.40      0.47      4404

avg / total       0.52      0.50      0.49      7929

             precision    recall  f1-score   support

          0       0.60      0.00      0.01      4423
          1       0.44      1.00      0.61      3506

avg / total       0.53      0.44      0.28      7929

             precision    recall  f1-score   support

          0       0.48      0.83      0.61      3733
          1       0.58      0.21      0.30      4196

avg / total       0.53      0.50      0.45      7929

             precision    recall  f1-score   support

          0       0.00      0.00      0.00      3919
          1       0.51      1.00      0.67      4010

avg / total       0.26      0.51      0.34      7929

             precision    recall  f1-score   support

          0       0.59      0.79      0.67      4482
          1       0.50      0.28 

In [39]:
#1-1 Use 2 hidden layers with 20 nodes in each layer with different learning rate

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), learning_rate_init = 0.5)
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20), learning_rate='constant',
       learning_rate_init=0.5, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [40]:
print("predicted:", mlp_clf.predict(data[-2:,1:].astype(float)))
print("truth", Y[-2:])

('predicted:', array([1, 1]))
('truth', array([0, 0]))


In [41]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6975631459484516)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [70]:
#1-1 Use 2 hidden layers with 20 nodes in each layer

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.5034035509903166 || std:  0.05761060760928748
precision:  average:  0.5481982274398177 || std:  0.07603868664678023
f1 score:  average:  0.3841992813920827 || std:  0.2626093107166627
recall:  average:  0.754569621331229 || std:  0.35752909873385663


In [71]:
#1-2 Use 2 hidden layers with 20 nodes in each layer, tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.5052229874648096 || std:  0.046480106786523555
precision:  average:  0.5367523109683527 || std:  0.06456755200224737
f1 score:  average:  0.390293070498552 || std:  0.22736679839089816
recall:  average:  0.213830948982831 || std:  0.09751567959065441


In [72]:
#2-1 Use 2 hidden layers with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.5583697214901321 || std:  0.03409878503891674
precision:  average:  0.5513756531161295 || std:  0.10233418857008793
f1 score:  average:  0.2769966814356991 || std:  0.19717793252686186
recall:  average:  0.6964452432760675 || std:  0.38467882786460494


In [73]:
#2-2 Use 2 hidden layers with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.49215635256340995 || std:  0.03699744800304449
precision:  average:  0.5719304138271688 || std:  0.020260475542297315
f1 score:  average:  0.3658679941873544 || std:  0.14792935371183827
recall:  average:  0.6086499331300402 || std:  0.3339610465997258


In [74]:
#3-1 Use 5 hidden layers with 20 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.5174038028471402 || std:  0.039377233740212535
precision:  average:  0.5159633783599658 || std:  0.07523872166145022
f1 score:  average:  0.5351989386189608 || std:  0.2627501565401155
recall:  average:  0.5186296644380894 || std:  0.40000600358582916


In [75]:
#3-2 Use 5 hidden layers with 20 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.5185918569178349 || std:  0.033853042560212245
precision:  average:  0.5022669464672822 || std:  0.0919292117653938
f1 score:  average:  0.38406164922033736 || std:  0.29749187362783625
recall:  average:  0.5085369300747077 || std:  0.35653292877799025


In [76]:
#4-1 Use 5 hidden with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)


print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.49886533983045034 || std:  0.03138818272078205
precision:  average:  0.5129972676210063 || std:  0.08537713769113725
f1 score:  average:  0.27110405625825107 || std:  0.23370484989280613
recall:  average:  0.6768182918063971 || std:  0.40923622022696904


In [77]:
#4-2 Use 5 hidden with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.5269159222000399 || std:  0.04104319369244362
precision:  average:  0.5759873877260745 || std:  0.08297066363130894
f1 score:  average:  0.43957383811214995 || std:  0.2353072343019145
recall:  average:  0.4669482692932889 || std:  0.4364697025346039


In [78]:
#task2 Use 5 hidden layers with 100 nodes in each layer.different init raite

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100), learning_rate_init = 0.5)

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", "average: ", np.mean(a), "|| std: ", np.std(a)
print "precision: ", "average: ", np.mean(b), "|| std: ", np.std(b)
print "f1 score: ", "average: ", np.mean(c), "|| std: ", np.std(c)
print "recall: ", "average: ", np.mean(d), "|| std: ", np.std(d)

accuracy:  average:  0.514275057685903 || std:  0.04593861109737616
precision:  average:  0.1604348593769706 || std:  0.1982026176966755
f1 score:  average:  0.24382969035693755 || std:  0.2986379916674196
recall:  average:  0.6 || std:  0.48989794855663565
