In [1]:
import numpy as np
import pandas as pd 
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix

from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/jg-fisher/diabetesNeuralNetwork/master/prima-indians-diabetes.csv', delimiter=",")

In [3]:
# split into input (X) and output (Y) variables, splitting csv data


X = df.iloc[:,:8]
y = df.iloc[:,8]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)

## Single Perceptron Layers 10, 50, 100

In [4]:
mlp10 = MLPClassifier(hidden_layer_sizes=(10,), random_state=42)
mlp10.fit(X_train, y_train)
mlp10_predict=mlp10.predict(X_test)
mlp10_conf_matrix=confusion_matrix(y_test, mlp10_predict)
mlp10_acc_score = accuracy_score(y_test, mlp10_predict)

print("MLP with 10 Cross Validation Scores:", cross_val_score(mlp10, X_train, y_train, cv=5)*100)
print("MLP with 10 Accuracy Score:",mlp10_acc_score*100)


MLP with 10 Cross Validation Scores: [64.81481481 64.81481481 67.28971963 60.74766355 76.41509434]
MLP with 10 Accuracy Score: 66.23376623376623


In [5]:
mlp50 = MLPClassifier(hidden_layer_sizes=(50,), random_state=42)
mlp50.fit(X_train, y_train)
mlp50_predict=mlp50.predict(X_test)
mlp50_conf_matrix=confusion_matrix(y_test, mlp50_predict)
mlp50_acc_score = accuracy_score(y_test, mlp50_predict)

print("MLP with 50 Cross Validation Scores:", cross_val_score(mlp50, X_train, y_train, cv=5)*100)
print("MLP with 50 Accuracy Score:",mlp50_acc_score*100)


MLP with 50 Cross Validation Scores: [62.96296296 68.51851852 67.28971963 61.68224299 70.75471698]
MLP with 50 Accuracy Score: 73.16017316017316


In [6]:
mlp100 = MLPClassifier(hidden_layer_sizes=(100,), random_state=42)
mlp100.fit(X_train, y_train)
mlp100_predict=mlp100.predict(X_test)
mlp100_conf_matrix=confusion_matrix(y_test, mlp100_predict)
mlp100_acc_score = accuracy_score(y_test, mlp100_predict)

print("MLP with 100 Cross Validation Scores:", cross_val_score(mlp100, X_train, y_train, cv=5)*100)
print("MLP with 100 Accuracy Score:",mlp100_acc_score*100)



MLP with 100 Cross Validation Scores: [62.03703704 70.37037037 70.09345794 64.48598131 74.52830189]
MLP with 100 Accuracy Score: 71.42857142857143


In [7]:
print("Accuracy Scores:")
print("MLP with 10 Accuracy Score:",mlp10_acc_score*100)
print("MLP with 50 Accuracy Score:",mlp50_acc_score*100)
print("MLP with 100 Accuracy Score:",mlp100_acc_score*100)

print("\nCross Validation Scores:")
print("MLP with 10 Cross Validation Scores:", cross_val_score(mlp10, X_train, y_train, cv=5)*100)
print("MLP with 50 Cross Validation Scores:", cross_val_score(mlp50, X_train, y_train, cv=5)*100)
print("MLP with 100 Cross Validation Scores:", cross_val_score(mlp100, X_train, y_train, cv=5)*100)

Accuracy Scores:
MLP with 10 Accuracy Score: 66.23376623376623
MLP with 50 Accuracy Score: 73.16017316017316
MLP with 100 Accuracy Score: 71.42857142857143

Cross Validation Scores:
MLP with 10 Cross Validation Scores: [64.81481481 64.81481481 67.28971963 60.74766355 76.41509434]
MLP with 50 Cross Validation Scores: [62.96296296 68.51851852 67.28971963 61.68224299 70.75471698]
MLP with 100 Cross Validation Scores: [62.03703704 70.37037037 70.09345794 64.48598131 74.52830189]


In [8]:
## Double Perceptron Layers w/ 2nd layer at 10

In [9]:
mlp1010 = MLPClassifier(hidden_layer_sizes=(10,10), random_state=42)
mlp1010.fit(X_train, y_train)
mlp1010_predict=mlp1010.predict(X_test)
mlp1010_conf_matrix=confusion_matrix(y_test, mlp1010_predict)
mlp1010_acc_score = accuracy_score(y_test, mlp1010_predict)

print("MLP with 10,10 Cross Validation Scores:", cross_val_score(mlp1010, X_train, y_train, cv=5)*100)
print("MLP with 10,10 Accuracy Score:",mlp1010_acc_score*100)


MLP with 10,10 Cross Validation Scores: [67.59259259 71.2962963  68.22429907 69.1588785  70.75471698]
MLP with 10,10 Accuracy Score: 70.12987012987013


In [10]:
mlp5010 = MLPClassifier(hidden_layer_sizes=(50,10), random_state=42)
mlp5010.fit(X_train, y_train)
mlp5010_predict=mlp5010.predict(X_test)
mlp5010_conf_matrix=confusion_matrix(y_test, mlp5010_predict)
mlp5010_acc_score = accuracy_score(y_test, mlp5010_predict)

print("MLP with 50,10 Cross Validation Scores:", cross_val_score(mlp5010, X_train, y_train, cv=5)*100)
print("MLP with 50,10 Accuracy Score:",mlp5010_acc_score*100)


MLP with 50,10 Cross Validation Scores: [65.74074074 71.2962963  70.09345794 64.48598131 70.75471698]
MLP with 50,10 Accuracy Score: 74.89177489177489


In [11]:
# Establish and fit the model, with a single, 100 perceptron layer.
mlp10010 = MLPClassifier(hidden_layer_sizes=(100,10), random_state=42)
mlp10010.fit(X_train, y_train)
mlp10010_predict=mlp10010.predict(X_test)
mlp10010_conf_matrix=confusion_matrix(y_test, mlp10010_predict)
mlp10010_acc_score = accuracy_score(y_test, mlp10010_predict)

print("MLP with 100,10 Cross Validation Scores:", cross_val_score(mlp10010, X_train, y_train, cv=5)*100)
print("MLP with 100,10 Accuracy Score:",mlp10010_acc_score*100)


MLP with 100,10 Cross Validation Scores: [60.18518519 69.44444444 64.48598131 65.42056075 71.69811321]
MLP with 100,10 Accuracy Score: 69.6969696969697


In [12]:
print("Accuracy Scores:")
print("MLP with 10,10 Accuracy Score:",mlp1010_acc_score*100)
print("MLP with 50,10 Accuracy Score:",mlp5010_acc_score*100)
print("MLP with 100,10 Accuracy Score:",mlp10010_acc_score*100)

print("\nCross Validation Scores:")
print("MLP with 10,10 Cross Validation Scores:", cross_val_score(mlp1010, X_train, y_train, cv=5)*100)
print("MLP with 50,10 Cross Validation Scores:", cross_val_score(mlp5010, X_train, y_train, cv=5)*100)
print("MLP with 100,10 Cross Validation Scores:", cross_val_score(mlp10010, X_train, y_train, cv=5)*100)

Accuracy Scores:
MLP with 10,10 Accuracy Score: 70.12987012987013
MLP with 50,10 Accuracy Score: 74.89177489177489
MLP with 100,10 Accuracy Score: 69.6969696969697

Cross Validation Scores:
MLP with 10,10 Cross Validation Scores: [67.59259259 71.2962963  68.22429907 69.1588785  70.75471698]
MLP with 50,10 Cross Validation Scores: [65.74074074 71.2962963  70.09345794 64.48598131 70.75471698]
MLP with 100,10 Cross Validation Scores: [60.18518519 69.44444444 64.48598131 65.42056075 71.69811321]


## Double Perceptron Layers w/ 2nd layer at 50

In [13]:
mlp1050 = MLPClassifier(hidden_layer_sizes=(10,50), random_state=42)
mlp1050.fit(X_train, y_train)
mlp1050_predict=mlp1050.predict(X_test)
mlp1050_conf_matrix=confusion_matrix(y_test, mlp1050_predict)
mlp1050_acc_score = accuracy_score(y_test, mlp1050_predict)

print("MLP with 10,50 Cross Validation Scores:", cross_val_score(mlp1050, X_train, y_train, cv=5)*100)
print("MLP with 10,50 Accuracy Score:",mlp1050_acc_score*100)



MLP with 10,50 Cross Validation Scores: [65.74074074 66.66666667 66.35514019 60.74766355 68.86792453]
MLP with 10,50 Accuracy Score: 74.45887445887446


In [14]:
mlp5050 = MLPClassifier(hidden_layer_sizes=(50,50), random_state=42)
mlp5050.fit(X_train, y_train)
mlp5050_predict=mlp5050.predict(X_test)
mlp5050_conf_matrix=confusion_matrix(y_test, mlp5050_predict)
mlp5050_acc_score = accuracy_score(y_test, mlp5050_predict)

print("MLP with 50,50 Cross Validation Scores:", cross_val_score(mlp5050, X_train, y_train, cv=5)*100)
print("MLP with 50,50 Accuracy Score:",mlp5050_acc_score*100)



MLP with 50,50 Cross Validation Scores: [63.88888889 73.14814815 68.22429907 66.35514019 74.52830189]
MLP with 50,50 Accuracy Score: 73.59307359307358


In [15]:
mlp10050 = MLPClassifier(hidden_layer_sizes=(100,50), random_state=42)
mlp10050.fit(X_train, y_train)
mlp10050_predict=mlp10050.predict(X_test)
mlp10050_conf_matrix=confusion_matrix(y_test, mlp10050_predict)
mlp10050_acc_score = accuracy_score(y_test, mlp10050_predict)

print("MLP with 100,50 Cross Validation Scores:", cross_val_score(mlp10050, X_train, y_train, cv=5)*100)
print("MLP with 100,50 Accuracy Score:",mlp10050_acc_score*100)



MLP with 100,50 Cross Validation Scores: [67.59259259 65.74074074 64.48598131 63.55140187 72.64150943]
MLP with 100,50 Accuracy Score: 73.16017316017316


In [16]:
print("Accuracy Scores:")
print("MLP with 10,50 Accuracy Score:",mlp1050_acc_score*100)
print("MLP with 50,50 Accuracy Score:",mlp5050_acc_score*100)
print("MLP with 100,50 Accuracy Score:",mlp10050_acc_score*100)

print("\nCross Validation Scores:")
print("MLP with 10,50 Cross Validation Scores:", cross_val_score(mlp1050, X_train, y_train, cv=5)*100)
print("MLP with 50,50 Cross Validation Scores:", cross_val_score(mlp5050, X_train, y_train, cv=5)*100)
print("MLP with 100,50 Cross Validation Scores:", cross_val_score(mlp10050, X_train, y_train, cv=5)*100)

Accuracy Scores:
MLP with 10,50 Accuracy Score: 74.45887445887446
MLP with 50,50 Accuracy Score: 73.59307359307358
MLP with 100,50 Accuracy Score: 73.16017316017316

Cross Validation Scores:
MLP with 10,50 Cross Validation Scores: [65.74074074 66.66666667 66.35514019 60.74766355 68.86792453]
MLP with 50,50 Cross Validation Scores: [63.88888889 73.14814815 68.22429907 66.35514019 74.52830189]
MLP with 100,50 Cross Validation Scores: [67.59259259 65.74074074 64.48598131 63.55140187 72.64150943]


## Double Perceptron Layers w/ 2nd layer at 100

In [17]:
mlp10100 = MLPClassifier(hidden_layer_sizes=(10,100), random_state=42)
mlp10100.fit(X_train, y_train)
mlp10100_predict=mlp10100.predict(X_test)
mlp10100_conf_matrix=confusion_matrix(y_test, mlp10100_predict)
mlp10100_acc_score = accuracy_score(y_test, mlp10100_predict)

print("MLP with 10,100 Cross Validation Scores:", cross_val_score(mlp10100, X_train, y_train, cv=5)*100)
print("MLP with 10,100 Accuracy Score:",mlp10100_acc_score*100)


MLP with 10,100 Cross Validation Scores: [61.11111111 66.66666667 70.09345794 61.68224299 68.86792453]
MLP with 10,100 Accuracy Score: 72.2943722943723


In [18]:
mlp50100 = MLPClassifier(hidden_layer_sizes=(50,100), random_state=42)
mlp50100.fit(X_train, y_train)
mlp50100_predict=mlp50100.predict(X_test)
mlp50100_conf_matrix=confusion_matrix(y_test, mlp50100_predict)
mlp50100_acc_score = accuracy_score(y_test, mlp50100_predict)

print("MLP with 50,100 Cross Validation Scores:", cross_val_score(mlp50100, X_train, y_train, cv=5)*100)
print("MLP with 50,100 Accuracy Score:",mlp50100_acc_score*100)


MLP with 50,100 Cross Validation Scores: [63.88888889 65.74074074 68.22429907 67.28971963 70.75471698]
MLP with 50,100 Accuracy Score: 70.12987012987013


In [19]:
# Establish and fit the model, with a single, 100 perceptron layer.
mlp100100 = MLPClassifier(hidden_layer_sizes=(100,100), random_state=42)
mlp100100.fit(X_train, y_train)
mlp100100_predict=mlp100100.predict(X_test)
mlp100100_conf_matrix=confusion_matrix(y_test, mlp100100_predict)
mlp100100_acc_score = accuracy_score(y_test, mlp100100_predict)

print("MLP with 100,100 Cross Validation Scores:", cross_val_score(mlp100100, X_train, y_train, cv=5)*100)
print("MLP with 100,100 Accuracy Score:",mlp100100_acc_score*100)


MLP with 100,100 Cross Validation Scores: [67.59259259 64.81481481 62.61682243 59.81308411 66.03773585]
MLP with 100,100 Accuracy Score: 76.19047619047619


In [20]:
print("Accuracy Scores:")
print("MLP with 10,100 Accuracy Score:",mlp10100_acc_score*100)
print("MLP with 50,100 Accuracy Score:",mlp50100_acc_score*100)
print("MLP with 100,100 Accuracy Score:",mlp100100_acc_score*100)

print("\nCross Validation Scores:")
print("MLP with 10,100 Cross Validation Scores:", cross_val_score(mlp10100, X_train, y_train, cv=5)*100)
print("MLP with 50,100 Cross Validation Scores:", cross_val_score(mlp50100, X_train, y_train, cv=5)*100)
print("MLP with 100,100 Cross Validation Scores:", cross_val_score(mlp100100, X_train, y_train, cv=5)*100)

Accuracy Scores:
MLP with 10,100 Accuracy Score: 72.2943722943723
MLP with 50,100 Accuracy Score: 70.12987012987013
MLP with 100,100 Accuracy Score: 76.19047619047619

Cross Validation Scores:
MLP with 10,100 Cross Validation Scores: [61.11111111 66.66666667 70.09345794 61.68224299 68.86792453]
MLP with 50,100 Cross Validation Scores: [63.88888889 65.74074074 68.22429907 67.28971963 70.75471698]
MLP with 100,100 Cross Validation Scores: [67.59259259 64.81481481 62.61682243 59.81308411 66.03773585]


## Triple Perceptron Layers w/ 2nd and 3rd layers at 10

In [21]:
mlp101010 = MLPClassifier(hidden_layer_sizes=(10,10,10), random_state=42)
mlp101010.fit(X_train, y_train)
mlp101010_predict=mlp101010.predict(X_test)
mlp101010_conf_matrix=confusion_matrix(y_test, mlp101010_predict)
mlp101010_acc_score = accuracy_score(y_test, mlp101010_predict)

print("MLP with 10,10,10 Cross Validation Scores:", cross_val_score(mlp101010, X_train, y_train, cv=5)*100)
print("MLP with 10,10,10 Accuracy Score:",mlp101010_acc_score*100)


MLP with 10,10,10 Cross Validation Scores: [66.66666667 68.51851852 65.42056075 54.20560748 72.64150943]
MLP with 10,10,10 Accuracy Score: 72.2943722943723


In [22]:
mlp501010 = MLPClassifier(hidden_layer_sizes=(50,10,10), random_state=42)
mlp501010.fit(X_train, y_train)
mlp501010_predict=mlp501010.predict(X_test)
mlp501010_conf_matrix=confusion_matrix(y_test, mlp501010_predict)
mlp501010_acc_score = accuracy_score(y_test, mlp501010_predict)

print("MLP with 50,10,10 Cross Validation Scores:", cross_val_score(mlp501010, X_train, y_train, cv=5)*100)
print("MLP with 50,10,10 Accuracy Score:",mlp501010_acc_score*100)


MLP with 50,10,10 Cross Validation Scores: [62.96296296 70.37037037 64.48598131 65.42056075 73.58490566]
MLP with 50,10,10 Accuracy Score: 76.19047619047619


In [23]:
# Establish and fit the model, with a single, 100 perceptron layer.
mlp1001010 = MLPClassifier(hidden_layer_sizes=(100,10,10), random_state=42)
mlp1001010.fit(X_train, y_train)
mlp1001010_predict=mlp1001010.predict(X_test)
mlp1001010_conf_matrix=confusion_matrix(y_test, mlp1001010_predict)
mlp1001010_acc_score = accuracy_score(y_test, mlp1001010_predict)

print("MLP with 100,10,10 Cross Validation Scores:", cross_val_score(mlp1001010, X_train, y_train, cv=5)*100)
print("MLP with 100,10,10 Accuracy Score:",mlp1001010_acc_score*100)


MLP with 100,10,10 Cross Validation Scores: [62.96296296 73.14814815 67.28971963 67.28971963 72.64150943]
MLP with 100,10,10 Accuracy Score: 72.72727272727273


In [24]:
print("Accuracy Scores:")
print("MLP with 10,10,10 Accuracy Score:",mlp101010_acc_score*100)
print("MLP with 50,10,10 Accuracy Score:",mlp501010_acc_score*100)
print("MLP with 100,10,10 Accuracy Score:",mlp1001010_acc_score*100)

print("\nCross Validation Scores:")
print("MLP with 10,10,10 Cross Validation Scores:", cross_val_score(mlp101010, X_train, y_train, cv=5)*100)
print("MLP with 50,10,10 Cross Validation Scores:", cross_val_score(mlp501010, X_train, y_train, cv=5)*100)
print("MLP with 100,10,10 Cross Validation Scores:", cross_val_score(mlp1001010, X_train, y_train, cv=5)*100)

Accuracy Scores:
MLP with 10,10,10 Accuracy Score: 72.2943722943723
MLP with 50,10,10 Accuracy Score: 76.19047619047619
MLP with 100,10,10 Accuracy Score: 72.72727272727273

Cross Validation Scores:
MLP with 10,10,10 Cross Validation Scores: [66.66666667 68.51851852 65.42056075 54.20560748 72.64150943]
MLP with 50,10,10 Cross Validation Scores: [62.96296296 70.37037037 64.48598131 65.42056075 73.58490566]
MLP with 100,10,10 Cross Validation Scores: [62.96296296 73.14814815 67.28971963 67.28971963 72.64150943]


## 3 layers all even

In [25]:
mlp101010 = MLPClassifier(hidden_layer_sizes=(10,10,10), random_state=42)
mlp101010.fit(X_train, y_train)
mlp101010_predict=mlp101010.predict(X_test)
mlp101010_conf_matrix=confusion_matrix(y_test, mlp101010_predict)
mlp101010_acc_score = accuracy_score(y_test, mlp101010_predict)

print("MLP with 10,10,10 Cross Validation Scores:", cross_val_score(mlp101010, X_train, y_train, cv=5)*100)
print("MLP with 10,10,10 Accuracy Score:",mlp101010_acc_score*100)


MLP with 10,10,10 Cross Validation Scores: [66.66666667 68.51851852 65.42056075 54.20560748 72.64150943]
MLP with 10,10,10 Accuracy Score: 72.2943722943723


In [26]:
mlp505050 = MLPClassifier(hidden_layer_sizes=(50,50,50), random_state=42)
mlp505050.fit(X_train, y_train)
mlp505050_predict=mlp505050.predict(X_test)
mlp505050_conf_matrix=confusion_matrix(y_test, mlp505050_predict)
mlp505050_acc_score = accuracy_score(y_test, mlp505050_predict)

print("MLP with 50,50,50 Cross Validation Scores:", cross_val_score(mlp505050, X_train, y_train, cv=5)*100)
print("MLP with 50,50,50 Accuracy Score:",mlp505050_acc_score*100)


MLP with 50,50,50 Cross Validation Scores: [60.18518519 68.51851852 70.09345794 65.42056075 66.98113208]
MLP with 50,50,50 Accuracy Score: 67.09956709956711


In [27]:
mlp100100100 = MLPClassifier(hidden_layer_sizes=(100,100,100), random_state=42)
mlp100100100.fit(X_train, y_train)
mlp100100100_predict=mlp100100100.predict(X_test)
mlp100100100_conf_matrix=confusion_matrix(y_test, mlp100100100_predict)
mlp100100100_acc_score = accuracy_score(y_test, mlp100100100_predict)

print("MLP with 100,100,100 Cross Validation Scores:", cross_val_score(mlp100100100, X_train, y_train, cv=5)*100)
print("MLP with 100,100,100 Accuracy Score:",mlp100100100_acc_score*100)


MLP with 100,100,100 Cross Validation Scores: [62.03703704 63.88888889 70.09345794 61.68224299 72.64150943]
MLP with 100,100,100 Accuracy Score: 73.59307359307358


In [28]:
print("Accuracy Scores:")
print("MLP with 10,10,10 Accuracy Score:",mlp101010_acc_score*100)
print("MLP with 50,50,50 Accuracy Score:",mlp505050_acc_score*100)
print("MLP with 100,100,100 Accuracy Score:",mlp100100100_acc_score*100)

print("\nCross Validation Scores:")
print("MLP with 10,10,10 Cross Validation Scores:", cross_val_score(mlp101010, X_train, y_train, cv=5)*100)
print("MLP with 50,50,50 Cross Validation Scores:", cross_val_score(mlp505050, X_train, y_train, cv=5)*100)
print("MLP with 100,100,100 Cross Validation Scores:", cross_val_score(mlp100100100, X_train, y_train, cv=5)*100)

Accuracy Scores:
MLP with 10,10,10 Accuracy Score: 72.2943722943723
MLP with 50,50,50 Accuracy Score: 67.09956709956711
MLP with 100,100,100 Accuracy Score: 73.59307359307358

Cross Validation Scores:
MLP with 10,10,10 Cross Validation Scores: [66.66666667 68.51851852 65.42056075 54.20560748 72.64150943]
MLP with 50,50,50 Cross Validation Scores: [60.18518519 68.51851852 70.09345794 65.42056075 66.98113208]
MLP with 100,100,100 Cross Validation Scores: [62.03703704 63.88888889 70.09345794 61.68224299 72.64150943]


In [29]:
### Checking 100 with a bigger 2nd layer 

# Establish and fit the model, with a single, 100 perceptron layer.
mlp100150 = MLPClassifier(hidden_layer_sizes=(100,150), random_state=42)
mlp100150.fit(X_train, y_train)
mlp100150_predict=mlp100150.predict(X_test)
mlp100150_conf_matrix=confusion_matrix(y_test, mlp100150_predict)
mlp100150_acc_score = accuracy_score(y_test, mlp100150_predict)

print("MLP with 100,150 Cross Validation Scores:", cross_val_score(mlp100150, X_train, y_train, cv=5)*100)
print("MLP with 100,150 Accuracy Score:",mlp100150_acc_score*100)


MLP with 100,150 Cross Validation Scores: [67.59259259 69.44444444 71.02803738 64.48598131 67.9245283 ]
MLP with 100,150 Accuracy Score: 70.995670995671


### Summary? 

<b>A girl knows nothing. A girl has no conclusion.
    
    
ok ok do the 2nd part....

In [33]:
#Random Forest
#random forest
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(random_state = 42)
rfc.fit(X_train, y_train)
pred_rfc = rfc.predict(X_test)
#Check accuracy

print('\n')
print("Random Forest Classifier Accuracy Score:",accuracy_score(y_test,pred_rfc )*100)
print("Random Forest Classifier F1 Score:",f1_score(y_test,pred_rfc, average="macro")*100)
print("Random Forest Classifier Precision Score:",precision_score(y_test,pred_rfc, average="macro")*100)
print("Random Forest Classifier Recall Score:",recall_score(y_test, pred_rfc, average="macro")*100) 
print("Random Forest Classifier Cross Validation Scores:", cross_val_score(rfc, X_train, y_train, cv=5)*100)
print("\n")
print("Random Forest Classifier Confusion Matrix:\n", confusion_matrix(y_test,pred_rfc))





Random Forest Classifier Accuracy Score: 73.16017316017316
Random Forest Classifier F1 Score: 69.18144258908589
Random Forest Classifier Precision Score: 70.26795380728979
Random Forest Classifier Recall Score: 68.59685430463576
Random Forest Classifier Cross Validation Scores: [75.         77.77777778 69.1588785  69.1588785  77.35849057]


Random Forest Classifier Confusion Matrix:
 [[126  25]
 [ 37  43]]


In [32]:
#Gradient Boost?
from sklearn import ensemble


gbc = ensemble.GradientBoostingClassifier(random_state = 42)
gbc.fit(X_train, y_train)
pred_gbc = gbc.predict(X_test)
#Check accuracy

print('\n')
print("Gradient Boosting Classifier Accuracy Score:",accuracy_score(y_test,pred_gbc )*100)
print("Gradient Boosting Classifier F1 Score:",f1_score(y_test, pred_gbc, average="macro")*100)
print("Gradient Boosting Classifier Precision Score:",precision_score(y_test,pred_gbc, average="macro")*100)
print("Gradient Boosting Classifier Recall Score:",recall_score(y_test, pred_gbc, average="macro")*100) 
print("Gradient Boosting Classifier Cross Validation Scores:", cross_val_score(gbc, X_train, y_train, cv=5)*100)
print("\n")
print("Gradient Boosting Classifier Confusion Matrix:\n", confusion_matrix(y_test,pred_gbc))




Gradient Boosting Classifier Accuracy Score: 75.75757575757575
Gradient Boosting Classifier F1 Score: 73.06795469686875
Gradient Boosting Classifier Precision Score: 73.21518350930117
Gradient Boosting Classifier Recall Score: 72.93460264900662
Gradient Boosting Classifier Cross Validation Scores: [75.         76.85185185 69.1588785  71.96261682 72.64150943]


Gradient Boosting Classifier Confusion Matrix:
 [[124  27]
 [ 29  51]]


OK OK OK I saw... something...

With this, accuracy scores were similar but confusion matricx score were more off. This might matter on larget datasets but in comparison to the simplest classifiers they were similar and it didn't. 

In [34]:
from sklearn.model_selection import GridSearchCV

In [37]:
#randomforestparam
param_grid = {
    'n_estimators': [100, 200, 300],
    'criterion': ['entropy', 'gini'],
    'max_depth': [10, 50, 100],
    'min_samples_split': [2, 4, 6, 8],
    'max_features': ['log2', 'sqrt','auto'],
    'random_state':[42]}

grid_rfc = GridSearchCV(rfc, param_grid = param_grid, cv = 3, verbose = 5)
grid_rfc.fit(X_train,y_train)

rfcbest_estimator = grid_rfc.best_estimator_


Fitting 3 folds for each of 216 candidates, totalling 648 fits
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42, score=0.75, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42, score=0.7865168539325843, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42, score=0.7752808988764045, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s remaining:    0.0s


[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42, score=0.7666666666666667, total=   0.1s
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.6s remaining:    0.0s


[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42, score=0.7696629213483146, total=   0.2s
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42, score=0.7752808988764045, total=   0.2s
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42, score=0.7611111111111111, total=   0.3s
[CV] criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42, score=0.7808988764044944, total=   0.4s
[CV] criterion=entropy, max_depth=10, max_fea

[CV]  criterion=entropy, max_depth=10, max_features=log2, min_samples_split=8, n_estimators=300, random_state=42, score=0.7696629213483146, total=   0.2s
[CV] criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42, score=0.7611111111111111, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42, score=0.7752808988764045, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42, score=0.7921348314606742, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_fea

[CV]  criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42, score=0.75, total=   0.1s
[CV] criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42, score=0.7696629213483146, total=   0.1s
[CV] criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42, score=0.7696629213483146, total=   0.1s
[CV] criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42, score=0.75, total=   0.2s
[CV] criterion=entropy, max_depth=10, max_features=sqrt, min_samples_spli

[CV]  criterion=entropy, max_depth=10, max_features=auto, min_samples_split=6, n_estimators=300, random_state=42, score=0.7584269662921348, total=   0.2s
[CV] criterion=entropy, max_depth=10, max_features=auto, min_samples_split=6, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=auto, min_samples_split=6, n_estimators=300, random_state=42, score=0.7696629213483146, total=   0.2s
[CV] criterion=entropy, max_depth=10, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42, score=0.7388888888888889, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=10, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42, score=0.7752808988764045, total=   0.0s
[CV] criterion=entropy, max_depth=10, max_fea

[CV]  criterion=entropy, max_depth=50, max_features=log2, min_samples_split=6, n_estimators=200, random_state=42, score=0.75, total=   0.1s
[CV] criterion=entropy, max_depth=50, max_features=log2, min_samples_split=6, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=log2, min_samples_split=6, n_estimators=200, random_state=42, score=0.7528089887640449, total=   0.1s
[CV] criterion=entropy, max_depth=50, max_features=log2, min_samples_split=6, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=log2, min_samples_split=6, n_estimators=200, random_state=42, score=0.7696629213483146, total=   0.2s
[CV] criterion=entropy, max_depth=50, max_features=log2, min_samples_split=6, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=log2, min_samples_split=6, n_estimators=300, random_state=42, score=0.7611111111111111, total=   0.3s
[CV] criterion=entropy, max_depth=50, max_features=log2, mi

[CV]  criterion=entropy, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=300, random_state=42, score=0.7752808988764045, total=   0.2s
[CV] criterion=entropy, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=300, random_state=42, score=0.7921348314606742, total=   0.2s
[CV] criterion=entropy, max_depth=50, max_features=sqrt, min_samples_split=6, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=sqrt, min_samples_split=6, n_estimators=100, random_state=42, score=0.7611111111111111, total=   0.0s
[CV] criterion=entropy, max_depth=50, max_features=sqrt, min_samples_split=6, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=sqrt, min_samples_split=6, n_estimators=100, random_state=42, score=0.7640449438202247, total=   0.0s
[CV] criterion=entropy, max_depth=50, max_fea

[CV]  criterion=entropy, max_depth=50, max_features=auto, min_samples_split=4, n_estimators=100, random_state=42, score=0.7640449438202247, total=   0.0s
[CV] criterion=entropy, max_depth=50, max_features=auto, min_samples_split=4, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=auto, min_samples_split=4, n_estimators=200, random_state=42, score=0.7666666666666667, total=   0.2s
[CV] criterion=entropy, max_depth=50, max_features=auto, min_samples_split=4, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=auto, min_samples_split=4, n_estimators=200, random_state=42, score=0.7752808988764045, total=   0.2s
[CV] criterion=entropy, max_depth=50, max_features=auto, min_samples_split=4, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=50, max_features=auto, min_samples_split=4, n_estimators=200, random_state=42, score=0.7808988764044944, total=   0.1s
[CV] criterion=entropy, max_depth=50, max_fea

[CV]  criterion=entropy, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42, score=0.75, total=   0.2s
[CV] criterion=entropy, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42, score=0.7865168539325843, total=   0.2s
[CV] criterion=entropy, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=300, random_state=42, score=0.7696629213483146, total=   0.3s
[CV] criterion=entropy, max_depth=100, max_features=log2, min_samples_split=4, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=log2, min_samples_split=4, n_estimators=100, random_state=42, score=0.7722222222222223, total=   0.0s
[CV] criterion=entropy, max_depth=100, max_features=

[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42, score=0.7640449438202247, total=   0.0s
[CV] criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42, score=0.7640449438202247, total=   0.0s
[CV] criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=200, random_state=42, score=0.7611111111111111, total=   0.1s
[CV] criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=200, random_state=42, score=0.7640449438202247, total=   0.1s
[CV] criterion=entropy, max_depth=100,

[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42, score=0.7808988764044944, total=   0.1s
[CV] criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42, score=0.75, total=   0.2s
[CV] criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42, score=0.7696629213483146, total=   0.2s
[CV] criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42, score=0.7865168539325843, total=   0.2s
[CV] criterion=entropy, max_depth=100, max_features=

[CV]  criterion=entropy, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42, score=0.7752808988764045, total=   0.0s
[CV] criterion=entropy, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42, score=0.7752808988764045, total=   0.0s
[CV] criterion=entropy, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=200, random_state=42, score=0.7444444444444445, total=   0.1s
[CV] criterion=entropy, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=200, random_state=42 
[CV]  criterion=entropy, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=200, random_state=42, score=0.7865168539325843, total=   0.1s
[CV] criterion=entropy, max_depth=100,

[CV]  criterion=gini, max_depth=10, max_features=log2, min_samples_split=6, n_estimators=300, random_state=42, score=0.7666666666666667, total=   0.2s
[CV] criterion=gini, max_depth=10, max_features=log2, min_samples_split=6, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=log2, min_samples_split=6, n_estimators=300, random_state=42, score=0.7640449438202247, total=   0.2s
[CV] criterion=gini, max_depth=10, max_features=log2, min_samples_split=6, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=log2, min_samples_split=6, n_estimators=300, random_state=42, score=0.7865168539325843, total=   0.2s
[CV] criterion=gini, max_depth=10, max_features=log2, min_samples_split=8, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=log2, min_samples_split=8, n_estimators=100, random_state=42, score=0.7611111111111111, total=   0.0s
[CV] criterion=gini, max_depth=10, max_features=log2, min_samples_

[CV]  criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_estimators=100, random_state=42, score=0.7696629213483146, total=   0.0s
[CV] criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_estimators=100, random_state=42, score=0.7921348314606742, total=   0.0s
[CV] criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_estimators=200, random_state=42, score=0.75, total=   0.1s
[CV] criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_estimators=200, random_state=42, score=0.7584269662921348, total=   0.1s
[CV] criterion=gini, max_depth=10, max_features=sqrt, min_samples_split=6, n_est

[CV]  criterion=gini, max_depth=10, max_features=auto, min_samples_split=4, n_estimators=300, random_state=42, score=0.7611111111111111, total=   0.2s
[CV] criterion=gini, max_depth=10, max_features=auto, min_samples_split=4, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=auto, min_samples_split=4, n_estimators=300, random_state=42, score=0.7640449438202247, total=   0.1s
[CV] criterion=gini, max_depth=10, max_features=auto, min_samples_split=4, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=auto, min_samples_split=4, n_estimators=300, random_state=42, score=0.7640449438202247, total=   0.2s
[CV] criterion=gini, max_depth=10, max_features=auto, min_samples_split=6, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=10, max_features=auto, min_samples_split=6, n_estimators=100, random_state=42, score=0.7555555555555555, total=   0.0s
[CV] criterion=gini, max_depth=10, max_features=auto, min_samples_

[CV]  criterion=gini, max_depth=50, max_features=log2, min_samples_split=4, n_estimators=200, random_state=42, score=0.7611111111111111, total=   0.1s
[CV] criterion=gini, max_depth=50, max_features=log2, min_samples_split=4, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=log2, min_samples_split=4, n_estimators=200, random_state=42, score=0.7528089887640449, total=   0.1s
[CV] criterion=gini, max_depth=50, max_features=log2, min_samples_split=4, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=log2, min_samples_split=4, n_estimators=200, random_state=42, score=0.7808988764044944, total=   0.1s
[CV] criterion=gini, max_depth=50, max_features=log2, min_samples_split=4, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=log2, min_samples_split=4, n_estimators=300, random_state=42, score=0.7666666666666667, total=   0.2s
[CV] criterion=gini, max_depth=50, max_features=log2, min_samples_

[CV]  criterion=gini, max_depth=50, max_features=sqrt, min_samples_split=2, n_estimators=300, random_state=42, score=0.7865168539325843, total=   0.2s
[CV] criterion=gini, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=100, random_state=42, score=0.7555555555555555, total=   0.0s
[CV] criterion=gini, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=100, random_state=42, score=0.7528089887640449, total=   0.1s
[CV] criterion=gini, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=sqrt, min_samples_split=4, n_estimators=100, random_state=42, score=0.7752808988764045, total=   0.0s
[CV] criterion=gini, max_depth=50, max_features=sqrt, min_samples_

[CV]  criterion=gini, max_depth=50, max_features=auto, min_samples_split=2, n_estimators=200, random_state=42, score=0.7808988764044944, total=   0.1s
[CV] criterion=gini, max_depth=50, max_features=auto, min_samples_split=2, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=auto, min_samples_split=2, n_estimators=200, random_state=42, score=0.7865168539325843, total=   0.1s
[CV] criterion=gini, max_depth=50, max_features=auto, min_samples_split=2, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=auto, min_samples_split=2, n_estimators=300, random_state=42, score=0.7611111111111111, total=   0.2s
[CV] criterion=gini, max_depth=50, max_features=auto, min_samples_split=2, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=50, max_features=auto, min_samples_split=2, n_estimators=300, random_state=42, score=0.7752808988764045, total=   0.2s
[CV] criterion=gini, max_depth=50, max_features=auto, min_samples_

[CV]  criterion=gini, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42, score=0.7752808988764045, total=   0.0s
[CV] criterion=gini, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=100, random_state=42, score=0.7640449438202247, total=   0.0s
[CV] criterion=gini, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42, score=0.7777777777777778, total=   0.1s
[CV] criterion=gini, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=log2, min_samples_split=2, n_estimators=200, random_state=42, score=0.7921348314606742, total=   0.1s
[CV] criterion=gini, max_depth=100, max_features=log2, min_

[CV]  criterion=gini, max_depth=100, max_features=log2, min_samples_split=8, n_estimators=300, random_state=42, score=0.7722222222222223, total=   0.2s
[CV] criterion=gini, max_depth=100, max_features=log2, min_samples_split=8, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=log2, min_samples_split=8, n_estimators=300, random_state=42, score=0.7471910112359551, total=   0.2s
[CV] criterion=gini, max_depth=100, max_features=log2, min_samples_split=8, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=log2, min_samples_split=8, n_estimators=300, random_state=42, score=0.7808988764044944, total=   0.2s
[CV] criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=2, n_estimators=100, random_state=42, score=0.7611111111111111, total=   0.0s
[CV] criterion=gini, max_depth=100, max_features=sqrt, min_

[CV]  criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42, score=0.7555555555555555, total=   0.1s
[CV] criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42, score=0.7640449438202247, total=   0.1s
[CV] criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=200, random_state=42, score=0.7696629213483146, total=   0.1s
[CV] criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=sqrt, min_samples_split=8, n_estimators=300, random_state=42, score=0.7555555555555555, total=   0.1s
[CV] criterion=gini, max_depth=100, max_features=sqrt, min_

[CV]  criterion=gini, max_depth=100, max_features=auto, min_samples_split=6, n_estimators=300, random_state=42, score=0.7752808988764045, total=   0.2s
[CV] criterion=gini, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42, score=0.7666666666666667, total=   0.0s
[CV] criterion=gini, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42, score=0.7696629213483146, total=   0.0s
[CV] criterion=gini, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42 
[CV]  criterion=gini, max_depth=100, max_features=auto, min_samples_split=8, n_estimators=100, random_state=42, score=0.7640449438202247, total=   0.0s
[CV] criterion=gini, max_depth=100, max_features=auto, min_

[Parallel(n_jobs=1)]: Done 648 out of 648 | elapsed:  2.4min finished


ok


In [36]:
#gradientboostparam
gbcparam= {'learning_rate':[0.15,0.1,0.05,0.01,0.005,0.001], 
           'n_estimators':[100,250,500,],
           'max_depth': [4, 6, 8],
           'min_samples_leaf': [20, 50,100,150],
           'max_features': [1.0, 0.3, 0.1],
          'random_state':[42]}

gbctuning = GridSearchCV(estimator = ensemble.GradientBoostingClassifier(max_depth=4, min_samples_split=2, min_samples_leaf=1, subsample=1,max_features='sqrt', random_state=10), 
            param_grid = gbcparam, scoring='accuracy',n_jobs=4,iid=False, cv=3)


gbctuning.fit(X_train,y_train)


GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=4,
              max_features='sqrt', max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_sam...        subsample=1, tol=0.0001, validation_fraction=0.1, verbose=0,
              warm_start=False),
       fit_params=None, iid=False, n_jobs=4,
       param_grid={'learning_rate': [0.15, 0.1, 0.05, 0.01, 0.005, 0.001], 'n_estimators': [100, 250, 500], 'max_depth': [4, 6, 8], 'min_samples_leaf': [20, 50, 100, 150], 'max_features': [1.0, 0.3, 0.1], 'random_state': [42]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [38]:
print('Best Parameters:')
print('\n')
print("Random Forest:\n",grid_rfc.best_params_)
print('\n')
print("Gradient Boost:\n",gbctuning.best_params_)

Best Parameters:


Random Forest:
 {'criterion': 'gini', 'max_depth': 50, 'max_features': 'log2', 'min_samples_split': 2, 'n_estimators': 200, 'random_state': 42}


Gradient Boost:
 {'learning_rate': 0.1, 'max_depth': 8, 'max_features': 1.0, 'min_samples_leaf': 20, 'n_estimators': 100, 'random_state': 42}


In [39]:
bestrfc = RandomForestClassifier(criterion= 'gini', max_depth= 50, 
                                 max_features= 'log2', min_samples_split= 2, 
                                 n_estimators= 200, random_state= 42)
bestrfc.fit(X_train, y_train)
bestpred_rfc = bestrfc.predict(X_test)
#Check accuracy

print('\n')
print("Random Forest Classifier Accuracy Score:",accuracy_score(y_test,bestpred_rfc )*100)
print("Random Forest Classifier F1 Score:",f1_score(y_test, bestpred_rfc, average="macro")*100)
print("Random Forest Classifier Precision Score:",precision_score(y_test,bestpred_rfc, average="macro")*100)
print("Random Forest Classifier Recall Score:",recall_score(y_test, bestpred_rfc, average="macro")*100) 
print("Random Forest Classifier Cross Validation Scores:", cross_val_score(bestrfc, X_train, y_train, cv=3)*100)
print("\n")
print("Random Forest Classifier Confusion Matrix:\n", confusion_matrix(y_test,bestpred_rfc))




Random Forest Classifier Accuracy Score: 75.75757575757575
Random Forest Classifier F1 Score: 73.22847682119205
Random Forest Classifier Precision Score: 73.22847682119206
Random Forest Classifier Recall Score: 73.22847682119206
Random Forest Classifier Cross Validation Scores: [77.77777778 79.21348315 79.21348315]


Random Forest Classifier Confusion Matrix:
 [[123  28]
 [ 28  52]]


In [42]:
bestgbc = ensemble.GradientBoostingClassifier(learning_rate= 0.1, max_depth= 8, 
                                              max_features= 1.0, min_samples_leaf= 20, 
                                              n_estimators= 100, random_state= 42)
bestgbc.fit(X_train, y_train)
bestpred_gbc = bestgbc.predict(X_test)
#Check accuracy
print('\n')
print("Gradient Boosting Classifier Accuracy Score:",accuracy_score(y_test,bestpred_gbc )*100)
print("Gradient Boosting Classifier F1 Score:",f1_score(y_test, bestpred_gbc, average="macro")*100)
print("Gradient Boosting Classifier Precision Score:",precision_score(y_test,bestpred_gbc, average="macro")*100)
print("Gradient Boosting Classifier Recall Score:",recall_score(y_test, bestpred_gbc, average="macro")*100) 
print("Gradient Boosting Classifier Cross Validation Scores:", cross_val_score(bestgbc, X_train, y_train, cv=3)*100)
print("\n")
print("Gradient Boosting Classifier Confusion Matrix:\n", confusion_matrix(y_test,bestpred_gbc))





Gradient Boosting Classifier Accuracy Score: 76.19047619047619
Gradient Boosting Classifier F1 Score: 74.21090023343145
Gradient Boosting Classifier Precision Score: 73.85057471264368
Gradient Boosting Classifier Recall Score: 74.73509933774834
Gradient Boosting Classifier Cross Validation Scores: [77.77777778 78.08988764 75.28089888]


Gradient Boosting Classifier Confusion Matrix:
 [[120  31]
 [ 24  56]]


#### Scores better with the right parameters but you have to take the time to find those.