In [1]:
import sklearn
import sklearn.tree
import numpy as np
from io import StringIO
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')

In [2]:
file = open('OnlineNewsPopularity.csv','r')

In [3]:
filelist = list()

for line in file:
    newLine = line.rstrip()
    x = np.array(newLine.split(","))
    filelist.append(x)

file.close()

In [4]:
filelist[1]

array(['http://mashable.com/2013/01/07/amazon-instant-video-browser/',
       ' 731.0', ' 12.0', ' 219.0', ' 0.663594466988', ' 0.999999992308',
       ' 0.815384609112', ' 4.0', ' 2.0', ' 1.0', ' 0.0', ' 4.6803652968',
       ' 5.0', ' 0.0', ' 1.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0',
       ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.0',
       ' 496.0', ' 496.0', ' 496.0', ' 1.0', ' 0.0', ' 0.0', ' 0.0',
       ' 0.0', ' 0.0', ' 0.0', ' 0.0', ' 0.500331204081',
       ' 0.378278929586', ' 0.0400046751006', ' 0.0412626477296',
       ' 0.0401225435029', ' 0.521617145481', ' 0.0925619834711',
       ' 0.0456621004566', ' 0.013698630137', ' 0.769230769231',
       ' 0.230769230769', ' 0.378636363636', ' 0.1', ' 0.7', ' -0.35',
       ' -0.6', ' -0.2', ' 0.5', ' -0.1875', ' 0.0', ' 0.1875', ' 593'],
      dtype='|S60')

In [5]:
filearray = np.array(filelist)


In [6]:
filearray.shape

(39645L, 61L)

In [7]:
data = filearray[1:,:-1]
y = filearray[1:,-1]

In [8]:
print (y)

[' 593' ' 711' ' 1500' ... ' 1900' ' 1100' ' 1300']


In [9]:
y = [x.strip(' ') for x in y]

In [10]:
y = [float(x) for x in y]

In [11]:
type(y[3])

float

In [12]:
Y = [1 if x >1400 else 0 for x in y]

In [13]:
Y = np.array(Y)

In [14]:
mlp_clf = MLPClassifier(hidden_layer_sizes=(30,30,30))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [15]:
print("predicted:", mlp_clf.predict(data[-2:,1:].astype(float)))
print("truth", Y[-2:])

('predicted:', array([0, 0]))
('truth', array([0, 0]))


In [16]:
#1-1 Use 2 hidden layers with 20 nodes in each layer

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [17]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 10.708644951557833)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [18]:
#1-2 Use 2 hidden layers with 20 nodes in each layer, tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [19]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6914463112885956)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [20]:
#2-1 Use 2 hidden layers with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [21]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 10.768781244523826)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [22]:
#2-2 Use 2 hidden layers with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [23]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6902934092321684)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [24]:
#3-1 Use 5 hidden layers with 20 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20, 20, 20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [25]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.7061767349969924)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [26]:
#3-2 Use 5 hidden layers with 20 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20, 20, 20, 20), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [27]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6932473980527416)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [28]:
#4-1 Use 5 hidden with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100))
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100, 100, 100, 100),
       learning_rate='constant', learning_rate_init=0.001, max_iter=200,
       momentum=0.9, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [29]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 5.91001618872255)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [30]:
#4-2 Use 5 hidden with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100), activation = 'tanh')
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100, 100, 100, 100),
       learning_rate='constant', learning_rate_init=0.001, max_iter=200,
       momentum=0.9, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [31]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6920912550769159)
('Number of Layers:', 7)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 100L), (100L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [32]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn import metrics

kf = KFold(n_splits=5)
kf.get_n_splits(data[:,1:])

5

In [33]:
#1-1 Use 2 hidden layers with 20 nodes in each layer
datax = data[:,1:]
Y = np.array(Y)

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20))
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)


             precision    recall  f1-score   support

          0       0.46      0.56      0.51      3525
          1       0.58      0.49      0.53      4404

avg / total       0.53      0.52      0.52      7929

             precision    recall  f1-score   support

          0       0.65      0.01      0.02      4423
          1       0.44      0.99      0.61      3506

avg / total       0.56      0.44      0.28      7929

             precision    recall  f1-score   support

          0       0.59      0.18      0.28      3733
          1       0.55      0.89      0.68      4196

avg / total       0.57      0.55      0.49      7929

             precision    recall  f1-score   support

          0       0.50      0.99      0.66      3919
          1       0.75      0.02      0.04      4010

avg / total       0.62      0.50      0.35      7929

             precision    recall  f1-score   support

          0       0.52      0.01      0.03      4482
          1       0.43      0.98 

In [34]:
#1-2 Use 2 hidden layers with 20 nodes in each layer, tanh

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), activation = 'tanh')
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)


             precision    recall  f1-score   support

          0       0.45      0.88      0.59      3525
          1       0.58      0.13      0.21      4404

avg / total       0.52      0.46      0.38      7929

             precision    recall  f1-score   support

          0       0.56      0.95      0.71      4423
          1       0.54      0.07      0.12      3506

avg / total       0.55      0.56      0.45      7929

             precision    recall  f1-score   support

          0       0.48      0.86      0.61      3733
          1       0.57      0.17      0.26      4196

avg / total       0.53      0.49      0.43      7929

             precision    recall  f1-score   support

          0       0.44      0.41      0.42      3919
          1       0.46      0.50      0.48      4010

avg / total       0.45      0.45      0.45      7929

             precision    recall  f1-score   support

          0       0.59      0.81      0.68      4482
          1       0.52      0.27 

In [35]:
#2-1 Use 2 hidden layers with 100 nodes in each layer.

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100) )
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.44      0.83      0.58      3525
          1       0.55      0.16      0.25      4404

avg / total       0.50      0.46      0.40      7929

             precision    recall  f1-score   support

          0       0.00      0.00      0.00      4423
          1       0.44      1.00      0.61      3506

avg / total       0.20      0.44      0.27      7929

             precision    recall  f1-score   support

          0       0.40      0.01      0.02      3733
          1       0.53      0.99      0.69      4196

avg / total       0.47      0.53      0.37      7929

             precision    recall  f1-score   support

          0       0.50      0.98      0.66      3919
          1       0.60      0.03      0.05      4010

avg / total       0.55      0.50      0.35      7929

             precision    recall  f1-score   support

          0       0.67      0.56      0.61      4482
          1       0.53      0.65 

In [36]:
#2-2 Use 2 hidden layers with 100 nodes in each layer. tanh

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100), activation = 'tanh')
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.43      0.19      0.27      3525
          1       0.55      0.80      0.65      4404

avg / total       0.50      0.53      0.48      7929

             precision    recall  f1-score   support

          0       0.56      0.88      0.69      4423
          1       0.48      0.13      0.21      3506

avg / total       0.52      0.55      0.48      7929

             precision    recall  f1-score   support

          0       0.47      0.96      0.64      3733
          1       0.60      0.05      0.09      4196

avg / total       0.54      0.48      0.35      7929

             precision    recall  f1-score   support

          0       0.54      0.67      0.60      3919
          1       0.58      0.45      0.50      4010

avg / total       0.56      0.56      0.55      7929

             precision    recall  f1-score   support

          0       0.58      0.92      0.71      4482
          1       0.53      0.11 

In [37]:
#3-1 Use 5 hidden layers with 20 nodes in each layer.

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20) )
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.45      0.62      0.52      3525
          1       0.56      0.39      0.46      4404

avg / total       0.51      0.49      0.48      7929

             precision    recall  f1-score   support

          0       0.66      0.52      0.58      4423
          1       0.52      0.67      0.59      3506

avg / total       0.60      0.59      0.59      7929

             precision    recall  f1-score   support

          0       0.57      0.34      0.43      3733
          1       0.57      0.77      0.65      4196

avg / total       0.57      0.57      0.55      7929

             precision    recall  f1-score   support

          0       0.50      0.98      0.66      3919
          1       0.73      0.04      0.08      4010

avg / total       0.62      0.51      0.37      7929

             precision    recall  f1-score   support

          0       0.60      0.57      0.59      4482
          1       0.47      0.50 

In [38]:
#3-2 Use 5 hidden layers with 20 nodes in each layer. tnah

for train_index, test_index in kf.split(datax):
    x_train, x_test = datax[train_index], datax[test_index]
    y_train, y_test = Y[train_index], Y[test_index]

    mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20), activation = 'tanh')
    mlp_clf.fit(x_train.astype(np.float), y_train)
    
    predicted = mlp_clf.predict(x_test.astype(np.float))
    
    print metrics.classification_report(y_test, predicted)

             precision    recall  f1-score   support

          0       0.45      0.62      0.52      3525
          1       0.57      0.40      0.47      4404

avg / total       0.52      0.50      0.49      7929

             precision    recall  f1-score   support

          0       0.60      0.00      0.01      4423
          1       0.44      1.00      0.61      3506

avg / total       0.53      0.44      0.28      7929

             precision    recall  f1-score   support

          0       0.48      0.83      0.61      3733
          1       0.58      0.21      0.30      4196

avg / total       0.53      0.50      0.45      7929

             precision    recall  f1-score   support

          0       0.00      0.00      0.00      3919
          1       0.51      1.00      0.67      4010

avg / total       0.26      0.51      0.34      7929

             precision    recall  f1-score   support

          0       0.59      0.79      0.67      4482
          1       0.50      0.28 

In [39]:
#1-1 Use 2 hidden layers with 20 nodes in each layer with different learning rate

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), learning_rate_init = 0.5)
mlp_clf.fit(data[:,1:].astype(np.float), Y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(20, 20), learning_rate='constant',
       learning_rate_init=0.5, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [40]:
print("predicted:", mlp_clf.predict(data[-2:,1:].astype(float)))
print("truth", Y[-2:])

('predicted:', array([1, 1]))
('truth', array([0, 0]))


In [41]:
print("Best Loss:", mlp_clf.best_loss_)
print("Number of Layers:", mlp_clf.n_layers_)
print("Output Layer Activation:", mlp_clf.out_activation_)
print("---------------------------------------------------------------------------------------------------------------")
print("Shape of the network weights for each layer:")
print("The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)")
print("Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)")
print("Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)")
print("Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)")
print([coef.shape for coef in mlp_clf.coefs_])
print("----------------------------------------------------------------------------------------------------------------")

('Best Loss:', 0.6975631459484516)
('Number of Layers:', 4)
('Output Layer Activation:', 'logistic')
---------------------------------------------------------------------------------------------------------------
Shape of the network weights for each layer:
The first tuple is basically (number of features,size of the first hidden layer), e.g. (5,10)
Second tuple is (size of the first hidden layer, size of the second hidden layer), e.g. (10,4)
Third tuple is (size of the second hidden layer, size of the third hidden layer), e.g. (4,5)
Lastly last layer will output the result, therefore it's size is (size of the third hidden layer, 1), e.g. (5,1)
[(59L, 20L), (20L, 20L), (20L, 1L)]
----------------------------------------------------------------------------------------------------------------


In [58]:
#1-1 Use 2 hidden layers with 20 nodes in each layer

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.53714214 0.47534368 0.52768319 0.49716232 0.44134712] average:  0.495735687908289
precision:  [0.55924442 0.45870835 0.53252861 0.64622642 0.51737452] average:  0.5428164628088
f1 score:  [0.5066336  0.61291736 0.68988337 0.67119155 0.55929332] average:  0.6079838402877199
recall:  [0.17870118 0.0427838  0.98975214 0.99725686 0.01625073] average:  0.4449489416360457


In [59]:
#1-2 Use 2 hidden layers with 20 nodes in each layer, tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.45856981 0.4448228  0.52162946 0.50573843 0.43529263] average:  0.4732106266271586
precision:  [0.53394355 0.45105954 0.63049096 0.57725948 0.51697128] average:  0.5419449602050601
f1 score:  [0.25278023 0.11835507 0.12442008 0.03617073 0.09225875] average:  0.12479696901850426
recall:  [0.07970027 0.21563035 0.17159199 0.01770574 0.78554846] average:  0.2540353620947823


In [60]:
#2-1 Use 2 hidden layers with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.55378989 0.57661748 0.52755707 0.52074663 0.63080222] average:  0.5619026561286412
precision:  [0.57464286 0.54903877 0.57701342 0.70232558 0.60200669] average:  0.601005465033087
f1 score:  [7.06413580e-01 5.69962952e-04 6.91907944e-01 6.46004555e-01
 6.10191317e-01] average:  0.5310174716625726
recall:  [0.03882834 0.9988591  0.07626311 0.45660848 0.00174115] average:  0.3144600344491399


In [61]:
#2-2 Use 2 hidden layers with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.46739816 0.5506369  0.48366755 0.4715601  0.43466196] average:  0.4815849328277871
precision:  [0.55542886 0.47148455 0.59078591 0.50593584 0.50799827] average:  0.5263266853925133
f1 score:  [0.28350948 0.57236304 0.39138069 0.26617874 0.60571931] average:  0.4238302513058049
recall:  [0.35217984 0.99971477 0.14823642 0.83391521 0.43731863] average:  0.5542729738168519


In [62]:
#3-1 Use 5 hidden layers with 20 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.51885484 0.56968092 0.56804137 0.53827721 0.44210394] average:  0.5273916535234057
precision:  [0.57543103 0.442606   0.63338246 0.50547835 0.60888889] average:  0.5531573472873197
f1 score:  [0.41055046 0.60268402 0.03348059 0.01281735 0.07138965] average:  0.2261844122039113
recall:  [0.8140327  0.99515117 0.00762631 0.18354115 0.0548462 ] average:  0.4110395046733733


In [63]:
#3-2 Use 5 hidden layers with 20 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(20,20,20,20,20), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.45894816 0.53916005 0.47332577 0.49854963 0.56281534] average:  0.5065597885064458
precision:  [0.5458256  0.44967682 0.57403009 0.58911954 0.5125    ] average:  0.5342304111152341
f1 score:  [0.02603707 0.21541195 0.6920855  0.47563249 0.37455566] average:  0.3567445333072411
recall:  [0.13169846 0.06446092 0.10343184 0.99975062 0.04497969] average:  0.2688643059922443


In [64]:
#4-1 Use 5 hidden with 100 nodes in each layer.

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100))

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.50460335 0.44204818 0.53726826 0.49489217 0.43453582] average:  0.48266955570241904
precision:  [0.56920502 0.47857948 0.52962669 0.510116   0.37681159] average:  0.49286775622635004
f1 score:  [0.47135417 0.59748712 0.32319722 0.65975104 0.53421927] average:  0.5172017624298333
recall:  [0.24137148 0.23074729 0.93755958 0.00972569 1.        ] average:  0.4838808074340252


In [65]:
#4-2 Use 5 hidden with 100 nodes in each layer. tanh

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100), activation = 'tanh')

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.46916383 0.55984361 0.51355783 0.50561231 0.43529263] average:  0.4966940419380427
precision:  [0.55542944 0.4421743  0.55576208 0.59250676 0.50307557] average:  0.5297896290919968
f1 score:  [0.14442013 0.61320507 0.18400964 0.02399021 0.60594338] average:  0.31431368671614585
recall:  [0.16121708 0.92641187 0.42326025 0.08104738 0.98520023] average:  0.5154273604628227


In [66]:
#task2 Use 5 hidden layers with 100 nodes in each layer.different init raite

mlp_clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100), learning_rate_init = 0.5)

a = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, n_jobs=-1)
b = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'precision', n_jobs=-1)
c = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'f1', n_jobs=-1)
d = cross_val_score(mlp_clf, data[:,1:].astype(np.float), Y, cv=kf, scoring = 'recall', n_jobs=-1)

print "accuracy: ", a, "average: ", np.mean(a)
print "precision: ",b, "average: ", np.mean(b)
print "f1 score: ",c, "average: ", np.mean(c)
print "recall: ",d, "average: ", np.mean(d)

accuracy:  [0.55542944 0.4421743  0.47080338 0.49426157 0.43466196] average:  0.4794661284388352
precision:  [0.         0.4421743  0.         0.50573843 0.        ] average:  0.18958254508765293
f1 score:  [0.         0.61320507 0.69212371 0.         0.60594338] average:  0.38225443262497877
recall:  [1. 0. 0. 1. 0.] average:  0.4
