# Libraries Imported

In [20]:
import numpy as np
import pandas as pd

from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

# Training Data

In [21]:
train = pd.read_csv('dataset/train.csv')

X_train = train.copy()
y_train = X_train.pop('target')

std = StandardScaler()
std.fit(X_train)
X_train = pd.DataFrame(std.transform(X_train), columns = X_train.columns)

# Fisher's Discriminant Training Data

In [22]:
train1 = pd.read_csv('dataset/fishertrain.csv')

X_train1 = train1.copy()
y_train1 = X_train1.pop('target')

std = StandardScaler()
std.fit(X_train1)
X_train1 = pd.DataFrame(std.transform(X_train1), columns = X_train1.columns)

# Correlation Training Data

In [23]:
train2 = pd.read_csv('dataset/corrtrain.csv')

X_train2 = train2.copy()
y_train2 = X_train2.pop('target')

std = StandardScaler()
std.fit(X_train2)
X_train2 = pd.DataFrame(std.transform(X_train2), columns = X_train2.columns)

# Testing Data

In [24]:
test = pd.read_csv('dataset/test.csv')
X_test = test.copy()
y_test = X_test.pop('target')

std = StandardScaler()
std.fit(X_test)
X_test = pd.DataFrame(std.transform(X_test), columns = X_test.columns)

# Fisher's Discriminant Testing Data

In [25]:
test1 = pd.read_csv('dataset/fishertest.csv')
X_test1 = test1.copy()
y_test1 = X_test1.pop('target')

std = StandardScaler()
std.fit(X_test1)
X_test1 = pd.DataFrame(std.transform(X_test1), columns = X_test1.columns)

# Correlation Testing Data

In [26]:
test2 = pd.read_csv('dataset/corrtest.csv')
X_test2 = test2.copy()
y_test2 = X_test2.pop('target')

std = StandardScaler()
std.fit(X_test2)
X_test2 = pd.DataFrame(std.transform(X_test2), columns = X_test2.columns)

# Random Forest GridSearch (Train)

In [9]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)

param_grid = { 
    'n_estimators': [400, 500],
    'max_features': ['auto'],
    'max_depth' : [6],
    'criterion' :['entropy']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['entropy'], 'max_depth': [6],
                         'max_features': ['auto'], 'n_estimators': [400, 500]})

# Random Forest Best Parameters (Train)

In [10]:
CV_rfc.best_params_

{'criterion': 'entropy',
 'max_depth': 6,
 'max_features': 'auto',
 'n_estimators': 500}

# Random Forest Classifier w/ Results (Train)

In [11]:
rfc = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 500, max_depth=6, criterion='entropy')

rfc = rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
rfc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.769231  0.945946  0.848485   74.000000
normal         0.826087  0.475000  0.603175   40.000000
accuracy       0.780702  0.780702  0.780702    0.780702
macro avg      0.797659  0.710473  0.725830  114.000000
weighted avg   0.789180  0.780702  0.762411  114.000000


# Random Forest GridSearch (Fisher's Discriminant)

In [15]:
clf = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [300, 400, 500],
    'max_depth' : [6, 7, 8],
    'criterion' :['entropy', 'gini'],
    #'min_samples_split' :[2, 3, 4], Default Values
    #'min_samples_leaf' :[1, 2, 3, 4], Default Values
    'max_features' :['auto', 'sqrt', 'log2']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train1, y_train1)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['entropy', 'gini'],
                         'max_depth': [6, 7, 8],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 3, 4],
                         'min_samples_split': [2, 3, 4],
                         'n_estimators': [300, 400, 500]})

# Random Forest Best Parameters (Fisher's Discriminant)

In [17]:
CV_rfc.best_params_

{'criterion': 'gini',
 'max_depth': 8,
 'max_features': 'auto',
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'n_estimators': 500}

# Random Forest Classifier w/ Results (Fisher's Discriminant)

In [18]:
rfc = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 500, max_depth=8, criterion='gini')

rfc = rfc.fit(X_train1, y_train1)
y_pred1 = rfc.predict(X_test1)
rfc.fit(X_train1, y_train1)

report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          1.000000  0.918919  0.957746   74.000000
normal         0.869565  1.000000  0.930233   40.000000
accuracy       0.947368  0.947368  0.947368    0.947368
macro avg      0.934783  0.959459  0.943990  114.000000
weighted avg   0.954233  0.947368  0.948092  114.000000


In [20]:
X_train1.head()

Unnamed: 0.1,Unnamed: 0,GLRLM_LongRunHighGrayLevelEmphasis,GLRLM_LowGrayLevelRunEmphasis,GLRLM_RunPercentage,GLRLM_Short owGrayLevelEmphasis,GLRLM_ShortRunEmphasis,contrast_d1_0,correlation_d1_45,correlation_d1_90,correlation_d2_0,...,energy_d3_90,homogeneity_d1_0,homogeneity_d1_135,homogeneity_d1_45,homogeneity_d1_90,homogeneity_d2_0,homogeneity_d2_135,homogeneity_d2_45,homogeneity_d3_0,homogeneity_d3_45
0,-1.728834,0.366372,-0.572715,-0.940529,-0.572715,0.857249,0.481394,0.633923,0.741315,0.482954,...,-0.675261,-0.809214,-0.897579,-0.590167,-0.869759,-0.800884,-0.897579,-0.590167,-0.662043,-0.769996
1,-1.722396,0.366372,-0.572715,-0.940529,-0.572715,0.857249,1.254414,0.395941,0.152625,-0.677103,...,-0.646354,-1.039761,-0.828036,-0.674718,-0.767044,-0.841549,-0.828036,-0.674718,-0.811528,-0.77372
2,-1.715957,0.366372,-0.572715,-0.940529,-0.572715,0.857249,1.031554,0.957659,0.509971,-0.050065,...,-0.683372,-0.913269,-1.071868,-0.160148,-0.593653,-0.920485,-1.071868,-0.160148,-1.064992,-0.333157
3,-1.709518,0.366372,-0.572715,-0.940529,-0.572715,0.857249,1.117547,-2.711753,-0.595905,-2.465356,...,-0.366298,-1.036064,0.359762,-0.874382,-0.314831,-0.917772,0.359762,-0.874382,-0.688836,-0.78707
4,-1.703079,0.366372,-0.572715,-0.940529,-0.572715,0.857249,-0.008513,-1.421406,-1.25379,-0.299646,...,-0.508088,-0.664024,-0.720264,-0.949913,-0.836817,-0.678392,-0.720264,-0.949913,-0.704523,-0.779187


# Random Forest GridSearch (Correlation)

In [None]:
clf = RandomForestClassifier(random_state=42)

param_grid = { 
    'n_estimators': [300, 400, 500],
    'max_depth' : [6, 7, 8],
    'criterion' :['entropy', 'gini'],
    'min_samples_split' :[2, 3, 4],
    'min_samples_leaf' :[1, 2, 3, 4],
    'max_features' :['auto', 'sqrt', 'log2']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train2, y_train2)

# Random Forest Best Parameters (Correlation)

In [None]:
CV_rfc.best_params_

# Random Forest Classifier w/ Results (Correlation)

In [27]:
rfc = RandomForestClassifier(random_state = 42, max_features = 'auto', min_samples_leaf = 4,
                             n_estimators = 500, max_depth = 7, criterion = 'gini')

rfc = rfc.fit(X_train2, y_train2)
y_pred2 = rfc.predict(X_test2)
rfc.fit(X_train2, y_train2)

report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

ValueError: X has 12 features, but DecisionTreeClassifier is expecting 60 features as input.

# Decision Tree GridSearch

In [7]:
clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

 0.51813776 0.55557286 0.51813776 0.55557286 0.51074766 0.57038768
 0.56123226 0.53123918 0.56123226 0.53123918 0.5778297  0.54814815
        nan        nan        nan        nan        nan        nan
 0.51813776 0.55557286 0.51813776 0.55557286 0.54624438 0.57038768
 0.56678782 0.53123918 0.56678782 0.53123918 0.58319488 0.56851852]


GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=42),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [0, 1, 2],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'splitter': ['best', 'random']})

# Decision Tree Best Parameters

In [8]:
CV_dtc.best_params_

{'criterion': 'entropy',
 'max_depth': 2,
 'max_features': 'log2',
 'splitter': 'best'}

# Decision Tree Classifier w/ Results

In [9]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
dtc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.711864  0.567568  0.631579   74.000000
normal         0.418182  0.575000  0.484211   40.000000
accuracy       0.570175  0.570175  0.570175    0.570175
macro avg      0.565023  0.571284  0.557895  114.000000
weighted avg   0.608818  0.570175  0.579871  114.000000


# Decision Tree GridSearch (Fisher's Discriminant)

In [None]:
clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train1, y_train1)

# Decision Tree Best Parameters (Fisher's Discriminant)

In [None]:
CV_dtc.best_params_

# Decision Tree Classifier w/ Results (Fisher's Discriminant)

In [None]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train1, y_train1)
y_pred1 = dtc.predict(X_test1)
dtc.fit(X_train1, y_train1)

report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# Decision Tree GridSearch (Correlation)

In [None]:
clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train2, y_train2)

# Decision Tree Best Parameters (Correlation)

In [None]:
CV_dtc.best_params_

# Decision Tree Classifier w/ Results (Correlation)

In [None]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train2, y_train2)
y_pred2 = dtc.predict(X_test2)
dtc.fit(X_train2, y_train2)

report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# SVC GridSearch

In [10]:
clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(random_state=42),
             param_grid={'cache_size': [100, 200, 300], 'coef0': [0, 0.5, 1, 2],
                         'degree': [2, 3, 4], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                         'tol': [0.01, 0.001, 0.0001]})

# SVC Best Parameters

In [11]:
CV_svc.best_params_

{'cache_size': 100,
 'coef0': 2,
 'degree': 4,
 'gamma': 'scale',
 'kernel': 'poly',
 'tol': 0.01}

# SVC Classifier w/ Results

In [12]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
clf.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.779412  0.716216  0.746479   74.000000
normal         0.543478  0.625000  0.581395   40.000000
accuracy       0.684211  0.684211  0.684211    0.684211
macro avg      0.661445  0.670608  0.663937  114.000000
weighted avg   0.696628  0.684211  0.688555  114.000000


# SVC GridSearch (Fisher's Discriminant)

In [None]:
clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train1, y_train1)

# SVC Best Parameters (Fisher's Discriminant)

In [None]:
CV_svc.best_params_

# SVC Classifier w/ Results (Fisher's Discriminant)

In [None]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train1, y_train1)
y_pred1 = clf.predict(X_test1)
clf.fit(X_train1, y_train1)

report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# SVC GridSearch (Correlation)

In [None]:
clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train2, y_train2)

# SVC Best Parameters (Correlation)

In [None]:
CV_svc.best_params_

# SVC Classifier w/ Results (Correlation)

In [None]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train2, y_train2)
y_pred2 = clf.predict(X_test2)
clf.fit(X_train2, y_train2)

report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# KNN Grid Search

In [13]:
clf = KNeighborsClassifier()

param_grid = { 
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
}

CV_knnc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_knnc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'metric': ['minkowski', 'euclidean'],
                         'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

# KNN Best Parameters

In [14]:
CV_knnc.best_params_

{'algorithm': 'auto',
 'metric': 'minkowski',
 'n_neighbors': 2,
 'weights': 'uniform'}

# KNN Classifier w/ Results

In [15]:
knnc = KNeighborsClassifier(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=10, n_neighbors=2, p=3,
                     weights='uniform')
knnc = knnc.fit(X_train,y_train)
y_pred = knnc.predict(X_test)
knnc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.687500  0.891892  0.776471   74.000000
normal         0.555556  0.250000  0.344828   40.000000
accuracy       0.666667  0.666667  0.666667    0.666667
macro avg      0.621528  0.570946  0.560649  114.000000
weighted avg   0.641204  0.666667  0.625017  114.000000


# KNN Grid Search (Fisher's Discriminant)

In [None]:
clf = KNeighborsClassifier()

param_grid = { 
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
}

CV_knnc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_knnc.fit(X_train1, y_train1)

# KNN Best Parameters (Fisher's Discriminant)

In [None]:
CV_knnc.best_params_

# KNN Classifier w/ Results (Fisher's Discriminant)

In [None]:
knnc = KNeighborsClassifier(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=10, n_neighbors=2, p=3,
                     weights='uniform')
knnc = knnc.fit(X_train1, y_train1)
y_pred1 = knnc.predict(X_test1)
knnc.fit(X_train1, y_train1)
report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# KNN Grid Search (Correlation)

In [None]:
clf = KNeighborsClassifier()

param_grid = { 
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
}

CV_knnc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_knnc.fit(X_train2, y_train2)

# KNN Best Parameters (Correlation)

In [None]:
CV_knnc.best_params_

# KNN Classifier w/ Results (Correlation)

In [None]:
knnc = KNeighborsClassifier(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=10, n_neighbors=2, p=3,
                     weights='uniform')
knnc = knnc.fit(X_train2, y_train2)
y_pred2 = knnc.predict(X_test2)
knnc.fit(X_train2, y_train2)
report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# ANN Grid Search

In [114]:
clf = MLPClassifier()

param_grid = { 
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

CV_mlpc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_mlpc.fit(X_train, y_train)















GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'alpha': [0.0001, 0.05],
                         'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50),
                                                (100,)],
                         'learning_rate': ['constant', 'adaptive'],
                         'solver': ['sgd', 'adam']},
             scoring='accuracy')

# ANN Best Parameters

In [115]:
CV_mlpc.best_params_

{'activation': 'tanh',
 'alpha': 0.05,
 'hidden_layer_sizes': (50, 50, 50),
 'learning_rate': 'constant',
 'solver': 'adam'}

# ANN Classifier w/ Results

In [194]:
mlpc = MLPClassifier(max_iter=150,
                    momentum=0.6,
                    solver='adam',
                    activation='tanh',
                    learning_rate_init=0.005,
                    alpha=0.05)
mlpc = mlpc.fit(X_train,y_train)
y_pred = mlpc.predict(X_test)
mlpc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)



              precision    recall  f1-score     support
fatty          0.721519  0.770270  0.745098   74.000000
normal         0.514286  0.450000  0.480000   40.000000
accuracy       0.657895  0.657895  0.657895    0.657895
macro avg      0.617902  0.610135  0.612549  114.000000
weighted avg   0.648806  0.657895  0.652081  114.000000




# ANN Grid Search (Fisher's Discriminant)

In [None]:
clf = MLPClassifier()

param_grid = { 
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

CV_mlpc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_mlpc.fit(X_train1, y_train1)

# ANN Best Parameters (Fisher's Discriminant)

In [None]:
CV_mlpc.best_params_

# ANN Classifier w/ Results (Fisher's Discriminant)

In [None]:
mlpc = MLPClassifier(max_iter=150, momentum=0.6, solver='adam', activation='tanh', learning_rate_init=0.005, alpha=0.05)
mlpc = mlpc.fit(X_train1, y_train1)
y_pred1 = mlpc.predict(X_test1)
mlpc.fit(X_train1, y_train1)
report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# ANN Grid Search (Correlation)

In [None]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier()

param_grid = { 
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

CV_mlpc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_mlpc.fit(X_train2, y_train2)

# ANN Best Parameters (Correlation)

In [None]:
CV_mlpc.best_params_

# ANN Classifier w/ Results (Correlation)

In [None]:
mlpc = MLPClassifier(max_iter=150, momentum=0.6, solver='adam', activation='tanh', learning_rate_init=0.005, alpha=0.05)
mlpc = mlpc.fit(X_train2, y_train2)
y_pred2 = mlpc.predict(X_test2)
mlpc.fit(X_train2, y_train2)
report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# Naive Bayes Grid Search

In [19]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import sklearn

clf = GaussianNB()

param_grid = { 
    'var_smoothing': np.logspace(0,-9, num=100)
}

cv_method = RepeatedStratifiedKFold(n_splits=5, 
                                    n_repeats=3, 
                                    random_state=900)

CV_nbc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= cv_method, scoring='accuracy')
CV_nbc.fit(X_train, y_train)

GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=5, random_state=900),
             estimator=GaussianNB(),
             param_grid={'var_smoothing': array([1.00000000e+00, 8.11130831e-01, 6.57933225e-01, 5.33669923e-01,
       4.32876128e-01, 3.51119173e-01, 2.84803587e-01, 2.31012970e-01,
       1.87381742e-01, 1.51991108e-01, 1.23284674e-01, 1.00000000e-01,
       8.11130831e-02, 6.57933225e-02, 5...
       1.23284674e-07, 1.00000000e-07, 8.11130831e-08, 6.57933225e-08,
       5.33669923e-08, 4.32876128e-08, 3.51119173e-08, 2.84803587e-08,
       2.31012970e-08, 1.87381742e-08, 1.51991108e-08, 1.23284674e-08,
       1.00000000e-08, 8.11130831e-09, 6.57933225e-09, 5.33669923e-09,
       4.32876128e-09, 3.51119173e-09, 2.84803587e-09, 2.31012970e-09,
       1.87381742e-09, 1.51991108e-09, 1.23284674e-09, 1.00000000e-09])},
             scoring='accuracy')

In [20]:
sklearn.metrics.SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_wei

# Naive Bayes Best Parameters

In [21]:
CV_nbc.best_params_

{'var_smoothing': 0.43287612810830584}

# Naive Bayes Classifier w/ Results

In [22]:
nbc = GaussianNB(var_smoothing=0.01873817422860384)
nbc = nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)
nbc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.647059  0.445946  0.528000   74.000000
normal         0.349206  0.550000  0.427184   40.000000
accuracy       0.482456  0.482456  0.482456    0.482456
macro avg      0.498133  0.497973  0.477592  114.000000
weighted avg   0.542549  0.482456  0.492626  114.000000


In [23]:
nbc = GaussianNB(var_smoothing=0.0657933224657568)
nbc = nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)
nbc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.641509  0.459459  0.535433   74.000000
normal         0.344262  0.525000  0.415842   40.000000
accuracy       0.482456  0.482456  0.482456    0.482456
macro avg      0.492886  0.492230  0.475637  114.000000
weighted avg   0.537212  0.482456  0.493471  114.000000


# Naive Bayes Grid Search (Fisher's Discriminant)

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import sklearn

clf = GaussianNB()

param_grid = { 
    'var_smoothing': np.logspace(0,-9, num=100)
}

cv_method = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=900)

CV_nbc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= cv_method, scoring='accuracy')
CV_nbc.fit(X_train1, y_train1)

# Naive Bayes Best Parameters (Fisher's Discriminant)

In [None]:
CV_nbc.best_params_

# Naive Bayes Classifier w/ Results

In [None]:
nbc = GaussianNB(var_smoothing=0.01873817422860384)
nbc = nbc.fit(X_train1, y_train1)
y_pred1 = nbc.predict(X_test1)
nbc.fit(X_train1, y_train1)
report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

# Naive Bayes Grid Search (Correlation)

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import sklearn

clf = GaussianNB()

param_grid = { 
    'var_smoothing': np.logspace(0,-9, num=100)
}

cv_method = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=900)

CV_nbc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= cv_method, scoring='accuracy')
CV_nbc.fit(X_train2, y_train2)

# Naive Bayes Best Parameters (Correlation)

In [None]:
CV_nbc.best_params_

# Naive Bayes Classifier w/ Results (Correlation)

In [None]:
nbc = GaussianNB(var_smoothing=0.01873817422860384)
nbc = nbc.fit(X_train2, y_train2)
y_pred2 = nbc.predict(X_test2)
nbc.fit(X_train2, y_train2)
report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)