In [24]:
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier  # i.e., logistic regression when let loss = "log"
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from cross_validation import cross_validation
import numpy as np
import pandas as pd

import data_opener

In [8]:
data = data_opener.get_data()
# Max corr is .5, so correlation not going to be used for feature selection
corr_matrix = data.corr()
#print(corr_matrix)

X_train, X_test, X_val, y_train, y_test, y_val = data_opener.train_test_val_split(data)


In [30]:
n_samples = len(y_train)
# n_samples = 100
n_samples = 1000
# n_samples = 10000
# n_samples = 20000
# n_samples = 42000

X_train = X_train.head(n_samples)
y_train = y_train.head(n_samples)

### Using MLP in sklearn

In [31]:
clf_MLP = MLPClassifier(solver = 'sgd', 
                        hidden_layer_sizes=(8), 
                        random_state=1,  
                        activation = 'relu', 
                        max_iter = 1000, 
                        alpha = 0.0001)

clf_MLP.fit(X_train, y_train)

print('sklearn MLP training done')

sklearn MLP training done


In [32]:
print("Test accuracy using sklearn.mlp: =================\n")
y_pred_mlp = clf_MLP.predict(X_test)

cv = cross_validation()
stats_mlp = cv.print_stat(y_test, y_pred_mlp)
print(stats_mlp[0])
print(stats_mlp[1])


Accuracy:	0.4862142857142857
ErrorRate:	0.5137857142857143
Precision:	0.4921451013996001
Recall:	0.48651701256529717

Confusion matrix:
			Positive	Negative	
pred_pos	3446			3556	
pred_neg	3637			3361	



In [27]:
# y_pred = clf.predict(X_train)
# print(accuracy_score(y_train, y_pred)*100)
# cm = confusion_matrix(y_train, y_pred)
# print(cm,'\n')
# classification_report(y_train, y_pred)

### Using Perceptron in sklearn

In [19]:
clf_Perceptron = Perceptron(alpha = 0.00001, 
                            fit_intercept=True, # i.e., bias
                            max_iter=1000,
                            shuffle=True)
clf_Perceptron.fit(X_train, y_train)
print('sklearn Perceptron training done')

sklearn Perceptron training done


In [20]:
print("Test accuracy using sklearn.Perceptron: =================\n")
y_pred_perceptron = clf_Perceptron.predict(X_test)

cv = cross_validation()
stats_perceptron = cv.print_stat(y_test, y_pred_perceptron)
print(stats_perceptron[0])
print(stats_perceptron[1])


Accuracy:	0.5266428571428572
ErrorRate:	0.4733571428571428
Precision:	0.7645011600928074
Recall:	0.09303967245517436

Confusion matrix:
			Positive	Negative	
pred_pos	659			203	
pred_neg	6424			6714	



### Using Logistic Regresion in sklearn

In [25]:
clf_Logistic = SGDClassifier(loss="log_loss",       # this makes it a logistic regression
                            alpha=0.00001,
                            max_iter=1000,
                            fit_intercept=True)
clf_Logistic.fit(X_train, y_train)

In [33]:
print("Test accuracy using sklearn.Logistic: =================\n")
y_pred_logistic = clf_Logistic.predict(X_test)

cv = cross_validation()
stats_logistic = cv.print_stat(y_test, y_pred_logistic)
print(stats_logistic[0])
print(stats_logistic[1])


Accuracy:	0.5712142857142857
ErrorRate:	0.4287857142857143
Precision:	0.7267002518891688
Recall:	0.24438797119864464

Confusion matrix:
			Positive	Negative	
pred_pos	1731			651	
pred_neg	5352			6266	

