# Model Training, Testing and Hyperparameter Tuning

## Importing Libraries

In [28]:
import numpy as np
import pandas as pd

from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

# Training Data

In [29]:
train = pd.read_csv('dataset/train.csv')

X_train = train.copy()
y_train = X_train.pop('target')

std = StandardScaler()
std.fit(X_train)
X_train = pd.DataFrame(std.transform(X_train), columns = X_train.columns)

# Fisher's Score Training Data

In [30]:
train1 = pd.read_csv('dataset/fishertrain.csv')

X_train1 = train1.copy()
y_train1 = X_train1.pop('target')

std = StandardScaler()
std.fit(X_train1)
X_train1 = pd.DataFrame(std.transform(X_train1), columns = X_train1.columns)

# Correlation Training Data

In [31]:
train2 = pd.read_csv('dataset/corrtrain.csv')

X_train2 = train2.copy()
y_train2 = X_train2.pop('target')

std = StandardScaler()
std.fit(X_train2)
X_train2 = pd.DataFrame(std.transform(X_train2), columns = X_train2.columns)

# Testing Data

In [32]:
test = pd.read_csv('dataset/test.csv')
X_test = test.copy()
y_test = X_test.pop('target')

std = StandardScaler()
std.fit(X_test)
X_test = pd.DataFrame(std.transform(X_test), columns = X_test.columns)

# Fisher's Score Testing Data

In [33]:
test1 = pd.read_csv('dataset/fishertest.csv')
X_test1 = test1.copy()
y_test1 = X_test1.pop('target')

std = StandardScaler()
std.fit(X_test1)
X_test1 = pd.DataFrame(std.transform(X_test1), columns = X_test1.columns)

# Correlation Testing Data

In [34]:
test2 = pd.read_csv('dataset/corrtest.csv')
X_test2 = test2.copy()
y_test2 = X_test2.pop('target')

std = StandardScaler()
std.fit(X_test2)
X_test2 = pd.DataFrame(std.transform(X_test2), columns = X_test2.columns)

# Random Forest GridSearch (Train)

In [35]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=42)

param_grid = { 
    'n_estimators': [400, 500],
    'max_features': ['auto'],
    'max_depth' : [6],
    'criterion' :['entropy']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['entropy'], 'max_depth': [6],
                         'max_features': ['auto'], 'n_estimators': [400, 500]})

# Random Forest Best Parameters (Train)

In [36]:
CV_rfc.best_params_

{'criterion': 'entropy',
 'max_depth': 6,
 'max_features': 'auto',
 'n_estimators': 500}

# Random Forest Classifier w/ Results (Train)

In [37]:
rfc = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 500, max_depth=6, criterion='entropy')

rfc = rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)
rfc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.769231  0.945946  0.848485   74.000000
normal         0.826087  0.475000  0.603175   40.000000
accuracy       0.780702  0.780702  0.780702    0.780702
macro avg      0.797659  0.710473  0.725830  114.000000
weighted avg   0.789180  0.780702  0.762411  114.000000


# Random Forest GridSearch (Fisher's Score)

In [38]:
clf = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [300, 400, 500],
    'max_depth' : [6, 7, 8],
    'criterion' :['entropy', 'gini'],
    #'min_samples_split' :[2, 3, 4], Default Values
    #'min_samples_leaf' :[1, 2, 3, 4], Default Values
    'max_features' :['auto', 'sqrt', 'log2']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train1, y_train1)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['entropy', 'gini'],
                         'max_depth': [6, 7, 8],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [300, 400, 500]})

# Random Forest Best Parameters (Fisher's Score)

In [39]:
CV_rfc.best_params_

{'criterion': 'gini',
 'max_depth': 7,
 'max_features': 'auto',
 'n_estimators': 500}

# Random Forest Classifier w/ Results (Fisher's Score)

In [40]:
rfc = RandomForestClassifier(random_state=42, max_features='auto', n_estimators= 500, max_depth=8, criterion='gini')

rfc = rfc.fit(X_train1, y_train1)
y_pred1 = rfc.predict(X_test1)
rfc.fit(X_train1, y_train1)

report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.786885  0.648649  0.711111   74.000000
normal         0.509434  0.675000  0.580645   40.000000
accuracy       0.657895  0.657895  0.657895    0.657895
macro avg      0.648160  0.661824  0.645878  114.000000
weighted avg   0.689534  0.657895  0.665334  114.000000


In [41]:
X_train1.head()

Unnamed: 0,GLRLM_LongRunHighGrayLevelEmphasis,GLRLM_LowGrayLevelRunEmphasis,GLRLM_RunPercentage,GLRLM_Short owGrayLevelEmphasis,GLRLM_ShortRunEmphasis,contrast_d1_0,correlation_d1_45,correlation_d1_90,correlation_d2_0,correlation_d2_135,...,energy_d3_90,homogeneity_d1_0,homogeneity_d1_135,homogeneity_d1_45,homogeneity_d1_90,homogeneity_d2_0,homogeneity_d2_135,homogeneity_d2_45,homogeneity_d3_0,homogeneity_d3_45
0,0.366372,-0.572715,-0.940529,-0.572715,0.857249,0.481394,0.633923,0.741315,0.482954,0.420214,...,-0.675261,-0.809214,-0.897579,-0.590167,-0.869759,-0.800884,-0.897579,-0.590167,-0.662043,-0.769996
1,0.366372,-0.572715,-0.940529,-0.572715,0.857249,1.254414,0.395941,0.152625,-0.677103,-0.539614,...,-0.646354,-1.039761,-0.828036,-0.674718,-0.767044,-0.841549,-0.828036,-0.674718,-0.811528,-0.77372
2,0.366372,-0.572715,-0.940529,-0.572715,0.857249,1.031554,0.957659,0.509971,-0.050065,-0.260695,...,-0.683372,-0.913269,-1.071868,-0.160148,-0.593653,-0.920485,-1.071868,-0.160148,-1.064992,-0.333157
3,0.366372,-0.572715,-0.940529,-0.572715,0.857249,1.117547,-2.711753,-0.595905,-2.465356,1.261876,...,-0.366298,-1.036064,0.359762,-0.874382,-0.314831,-0.917772,0.359762,-0.874382,-0.688836,-0.78707
4,0.366372,-0.572715,-0.940529,-0.572715,0.857249,-0.008513,-1.421406,-1.25379,-0.299646,-0.436729,...,-0.508088,-0.664024,-0.720264,-0.949913,-0.836817,-0.678392,-0.720264,-0.949913,-0.704523,-0.779187


# Random Forest GridSearch (Correlation)

In [42]:
clf = RandomForestClassifier(random_state=42)

param_grid = { 
    'n_estimators': [300, 400, 500],
    'max_depth' : [6, 7, 8],
    'criterion' :['entropy', 'gini'],
    'min_samples_split' :[2, 3, 4],
    'min_samples_leaf' :[1, 2, 3, 4],
    'max_features' :['auto', 'sqrt', 'log2']
}

CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_rfc.fit(X_train2, y_train2)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),
             param_grid={'criterion': ['entropy', 'gini'],
                         'max_depth': [6, 7, 8],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 3, 4],
                         'min_samples_split': [2, 3, 4],
                         'n_estimators': [300, 400, 500]})

# Random Forest Best Parameters (Correlation)

In [43]:
CV_rfc.best_params_

{'criterion': 'entropy',
 'max_depth': 7,
 'max_features': 'auto',
 'min_samples_leaf': 4,
 'min_samples_split': 2,
 'n_estimators': 500}

# Random Forest Classifier w/ Results (Correlation)

In [44]:
rfc = RandomForestClassifier(random_state = 42, max_features = 'auto', min_samples_leaf = 4,
                             n_estimators = 500, max_depth = 7, criterion = 'gini')

rfc = rfc.fit(X_train2, y_train2)
y_pred2 = rfc.predict(X_test2)
rfc.fit(X_train2, y_train2)

report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.764045  0.918919  0.834356   74.000000
normal         0.760000  0.475000  0.584615   40.000000
accuracy       0.763158  0.763158  0.763158    0.763158
macro avg      0.762022  0.696959  0.709486  114.000000
weighted avg   0.762626  0.763158  0.746728  114.000000


# Decision Tree GridSearch

In [45]:
clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train, y_train)

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

 0.51813776 0.55557286 0.51813776 0.55557286 0.51074766 0.57038768
 0.56123226 0.53123918 0.56123226 0.53123918 0.5778297  0.54814815
        nan        nan        nan        nan        nan        nan
 0.51813776 0.55557286 0.51813776 0.55557286 0.54624438 0.57038768
 0.56678782 0.53123918 0.56678782 0.53123918 0.58319488 0.56851852]


GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=42),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [0, 1, 2],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'splitter': ['best', 'random']})

# Decision Tree Best Parameters

In [46]:
CV_dtc.best_params_

{'criterion': 'entropy',
 'max_depth': 2,
 'max_features': 'log2',
 'splitter': 'best'}

# Decision Tree Classifier w/ Results

In [47]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
dtc.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.711864  0.567568  0.631579   74.000000
normal         0.418182  0.575000  0.484211   40.000000
accuracy       0.570175  0.570175  0.570175    0.570175
macro avg      0.565023  0.571284  0.557895  114.000000
weighted avg   0.608818  0.570175  0.579871  114.000000


# Decision Tree GridSearch (Fisher's Score)

In [48]:
clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train1, y_train1)

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

 0.49593285 0.58736587 0.49593285 0.58736587 0.49593285 0.58736587
 0.52564901 0.55922465 0.52564901 0.55922465 0.52566632 0.54811353
        nan        nan        nan        nan        nan        nan
 0.49593285 0.58736587 0.49593285 0.58736587 0.49593285 0.58736587
 0.52564901 0.54614053 0.52564901 0.54614053 0.52566632 0.54811353]


GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=42),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [0, 1, 2],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'splitter': ['best', 'random']})

# Decision Tree Best Parameters (Fisher's Score)

In [49]:
CV_dtc.best_params_

{'criterion': 'gini',
 'max_depth': 1,
 'max_features': 'auto',
 'splitter': 'random'}

# Decision Tree Classifier w/ Results (Fisher's Score)

In [50]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train1, y_train1)
y_pred1 = dtc.predict(X_test1)
dtc.fit(X_train1, y_train1)

report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.625000  0.472973  0.538462   74.000000
normal         0.327586  0.475000  0.387755   40.000000
accuracy       0.473684  0.473684  0.473684    0.473684
macro avg      0.476293  0.473986  0.463108  114.000000
weighted avg   0.520644  0.473684  0.485582  114.000000


# Decision Tree GridSearch (Correlation)

In [51]:
clf = DecisionTreeClassifier(random_state=42)

param_grid = { 
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [0,1,2],
    'criterion' :['gini','entropy'],
    'splitter' :['best', 'random']
}

CV_dtc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_dtc.fit(X_train2, y_train2)

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

Traceback (most recent call last):
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\user\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 286, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueEr

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=42),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [0, 1, 2],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'splitter': ['best', 'random']})

# Decision Tree Best Parameters (Correlation)

In [52]:
CV_dtc.best_params_

{'criterion': 'entropy',
 'max_depth': 2,
 'max_features': 'auto',
 'splitter': 'best'}

# Decision Tree Classifier w/ Results (Correlation)

In [53]:
dtc = DecisionTreeClassifier(random_state = 42, criterion = 'entropy', max_depth = 2, max_features = 'log2', splitter = 'best')

dtc = dtc.fit(X_train2, y_train2)
y_pred2 = dtc.predict(X_test2)
dtc.fit(X_train2, y_train2)

report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.794872  0.837838  0.815789   74.000000
normal         0.666667  0.600000  0.631579   40.000000
accuracy       0.754386  0.754386  0.754386    0.754386
macro avg      0.730769  0.718919  0.723684  114.000000
weighted avg   0.749888  0.754386  0.751154  114.000000


# SVC GridSearch

In [54]:
clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=SVC(random_state=42),
             param_grid={'cache_size': [100, 200, 300], 'coef0': [0, 0.5, 1, 2],
                         'degree': [2, 3, 4], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                         'tol': [0.01, 0.001, 0.0001]})

# SVC Best Parameters

In [55]:
CV_svc.best_params_

{'cache_size': 100,
 'coef0': 2,
 'degree': 4,
 'gamma': 'scale',
 'kernel': 'poly',
 'tol': 0.01}

# SVC Classifier w/ Results

In [56]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
clf.fit(X_train, y_train)

report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.779412  0.716216  0.746479   74.000000
normal         0.543478  0.625000  0.581395   40.000000
accuracy       0.684211  0.684211  0.684211    0.684211
macro avg      0.661445  0.670608  0.663937  114.000000
weighted avg   0.696628  0.684211  0.688555  114.000000


# SVC GridSearch (Fisher's Score)

In [57]:
clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train1, y_train1)

GridSearchCV(cv=5, estimator=SVC(random_state=42),
             param_grid={'cache_size': [100, 200, 300], 'coef0': [0, 0.5, 1, 2],
                         'degree': [2, 3, 4], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                         'tol': [0.01, 0.001, 0.0001]})

# SVC Best Parameters (Fisher's Score)

In [58]:
CV_svc.best_params_

{'cache_size': 100,
 'coef0': 2,
 'degree': 4,
 'gamma': 'auto',
 'kernel': 'poly',
 'tol': 0.01}

# SVC Classifier w/ Results (Fisher's Score)

In [59]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train1, y_train1)
y_pred1 = clf.predict(X_test1)
clf.fit(X_train1, y_train1)

report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.676471  0.621622  0.647887   74.000000
normal         0.391304  0.450000  0.418605   40.000000
accuracy       0.561404  0.561404  0.561404    0.561404
macro avg      0.533887  0.535811  0.533246  114.000000
weighted avg   0.576412  0.561404  0.567437  114.000000


# SVC GridSearch (Correlation)

In [60]:
clf = svm.SVC(random_state = 42)

param_grid = { 
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2,3,4],
    'gamma': ['scale', 'auto'],
    'tol': [1e-2, 1e-3, 1e-4],
    'cache_size': [100,200,300],
    'coef0':[0,0.5,1,2]
}

CV_svc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5)
CV_svc.fit(X_train2, y_train2)

GridSearchCV(cv=5, estimator=SVC(random_state=42),
             param_grid={'cache_size': [100, 200, 300], 'coef0': [0, 0.5, 1, 2],
                         'degree': [2, 3, 4], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                         'tol': [0.01, 0.001, 0.0001]})

# SVC Best Parameters (Correlation)

In [61]:
CV_svc.best_params_

{'cache_size': 100,
 'coef0': 2,
 'degree': 2,
 'gamma': 'auto',
 'kernel': 'sigmoid',
 'tol': 0.01}

# SVC Classifier w/ Results (Correlation)

In [62]:
clf = svm.SVC(random_state = 42, coef0 = 2, cache_size = 100, degree = 3, gamma = 'auto',
                             kernel = 'poly', tol = 0.01)

clf = clf.fit(X_train2, y_train2)
y_pred2 = clf.predict(X_test2)
clf.fit(X_train2, y_train2)

report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.743243  0.743243  0.743243   74.000000
normal         0.525000  0.525000  0.525000   40.000000
accuracy       0.666667  0.666667  0.666667    0.666667
macro avg      0.634122  0.634122  0.634122  114.000000
weighted avg   0.666667  0.666667  0.666667  114.000000


# KNN Grid Search

In [63]:
clf = KNeighborsClassifier()

param_grid = { 
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
}

CV_knnc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_knnc.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'metric': ['minkowski', 'euclidean'],
                         'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

# KNN Best Parameters

In [64]:
CV_knnc.best_params_

{'algorithm': 'auto',
 'metric': 'minkowski',
 'n_neighbors': 2,
 'weights': 'uniform'}

# KNN Classifier w/ Results

In [114]:
knnc = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=10, n_neighbors=2, p=3,
                     weights='uniform')
knnc = knnc.fit(X_train,y_train)
y_pred = knnc.predict(X_test)
knnc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.687500  0.891892  0.776471   74.000000
normal         0.555556  0.250000  0.344828   40.000000
accuracy       0.666667  0.666667  0.666667    0.666667
macro avg      0.621528  0.570946  0.560649  114.000000
weighted avg   0.641204  0.666667  0.625017  114.000000


# KNN Grid Search (Fisher's Score)

In [66]:
clf = KNeighborsClassifier()

param_grid = { 
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
}

CV_knnc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_knnc.fit(X_train1, y_train1)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'metric': ['minkowski', 'euclidean'],
                         'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

# KNN Best Parameters (Fisher's Score)

In [67]:
CV_knnc.best_params_

{'algorithm': 'auto',
 'metric': 'minkowski',
 'n_neighbors': 5,
 'weights': 'distance'}

# KNN Classifier w/ Results (Fisher's Score)

In [121]:
knnc = KNeighborsClassifier(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=10, n_neighbors=2, p=3,
                     weights='uniform')
knnc = knnc.fit(X_train1, y_train1)
y_pred1 = knnc.predict(X_test1)
knnc.fit(X_train1, y_train1)
report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.698925  0.878378  0.778443   74.000000
normal         0.571429  0.300000  0.393443   40.000000
accuracy       0.675439  0.675439  0.675439    0.675439
macro avg      0.635177  0.589189  0.585943  114.000000
weighted avg   0.654189  0.675439  0.643355  114.000000


# KNN Grid Search (Correlation)

In [69]:
clf = KNeighborsClassifier()

param_grid = { 
    'weights': ['uniform', 'distance'],
    'metric': ['minkowski', 'euclidean'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
}

CV_knnc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_knnc.fit(X_train2, y_train2)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'metric': ['minkowski', 'euclidean'],
                         'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

# KNN Best Parameters (Correlation)

In [70]:
CV_knnc.best_params_

{'algorithm': 'auto',
 'metric': 'minkowski',
 'n_neighbors': 2,
 'weights': 'uniform'}

# KNN Classifier w/ Results (Correlation)

In [71]:
knnc = KNeighborsClassifier(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=10, n_neighbors=2, p=3,
                     weights='uniform')
knnc = knnc.fit(X_train2, y_train2)
y_pred2 = knnc.predict(X_test2)
knnc.fit(X_train2, y_train2)
report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.682353  0.783784  0.729560   74.000000
normal         0.448276  0.325000  0.376812   40.000000
accuracy       0.622807  0.622807  0.622807    0.622807
macro avg      0.565314  0.554392  0.553186  114.000000
weighted avg   0.600221  0.622807  0.605788  114.000000


# ANN Grid Search

In [72]:
clf = MLPClassifier()

param_grid = { 
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

CV_mlpc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_mlpc.fit(X_train, y_train)















GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'alpha': [0.0001, 0.05],
                         'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50),
                                                (100,)],
                         'learning_rate': ['constant', 'adaptive'],
                         'solver': ['sgd', 'adam']},
             scoring='accuracy')

# ANN Best Parameters

In [73]:
CV_mlpc.best_params_

{'activation': 'tanh',
 'alpha': 0.0001,
 'hidden_layer_sizes': (50, 50, 50),
 'learning_rate': 'constant',
 'solver': 'adam'}

# ANN Classifier w/ Results

In [127]:
mlpc = MLPClassifier(max_iter=150,
                    momentum=0.6,
                    solver='adam',
                    activation='tanh',
                    learning_rate_init=0.005,
                    alpha=0.001)
mlpc = mlpc.fit(X_train,y_train)
y_pred = mlpc.predict(X_test)
mlpc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)



              precision    recall  f1-score     support
fatty          0.714286  0.743243  0.728477   74.000000
normal         0.486486  0.450000  0.467532   40.000000
accuracy       0.640351  0.640351  0.640351    0.640351
macro avg      0.600386  0.596622  0.598005  114.000000
weighted avg   0.634356  0.640351  0.636917  114.000000




# ANN Grid Search (Fisher's Score)

In [75]:
clf = MLPClassifier()

param_grid = { 
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

CV_mlpc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_mlpc.fit(X_train1, y_train1)















GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'alpha': [0.0001, 0.05],
                         'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50),
                                                (100,)],
                         'learning_rate': ['constant', 'adaptive'],
                         'solver': ['sgd', 'adam']},
             scoring='accuracy')

# ANN Best Parameters (Fisher's Score)

In [76]:
CV_mlpc.best_params_

{'activation': 'tanh',
 'alpha': 0.0001,
 'hidden_layer_sizes': (50, 50, 50),
 'learning_rate': 'adaptive',
 'solver': 'adam'}

# ANN Classifier w/ Results (Fisher's Score)

In [112]:
mlpc = MLPClassifier(max_iter=150, momentum=0.6, solver='adam', activation='tanh', learning_rate_init=0.005, alpha=0.0001)
mlpc = mlpc.fit(X_train1, y_train1)
y_pred1 = mlpc.predict(X_test1)
mlpc.fit(X_train1, y_train1)
report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)



              precision    recall  f1-score     support
fatty          0.708861  0.756757  0.732026   74.000000
normal         0.485714  0.425000  0.453333   40.000000
accuracy       0.640351  0.640351  0.640351    0.640351
macro avg      0.597288  0.590878  0.592680  114.000000
weighted avg   0.630564  0.640351  0.634239  114.000000




# ANN Grid Search (Correlation)

In [78]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier()

param_grid = { 
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

CV_mlpc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 5, scoring='accuracy')
CV_mlpc.fit(X_train2, y_train2)















GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'alpha': [0.0001, 0.05],
                         'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50),
                                                (100,)],
                         'learning_rate': ['constant', 'adaptive'],
                         'solver': ['sgd', 'adam']},
             scoring='accuracy')

# ANN Best Parameters (Correlation)

In [79]:
CV_mlpc.best_params_

{'activation': 'tanh',
 'alpha': 0.05,
 'hidden_layer_sizes': (50, 50, 50),
 'learning_rate': 'adaptive',
 'solver': 'adam'}

# ANN Classifier w/ Results (Correlation)

In [101]:
mlpc = MLPClassifier(max_iter=150, momentum=0.6, solver='adam', activation='tanh', learning_rate_init=0.005, alpha=0.05)
mlpc = mlpc.fit(X_train2, y_train2)
y_pred2 = mlpc.predict(X_test2)
mlpc.fit(X_train2, y_train2)
report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)



              precision    recall  f1-score     support
fatty          0.768293  0.851351  0.807692   74.000000
normal         0.656250  0.525000  0.583333   40.000000
accuracy       0.736842  0.736842  0.736842    0.736842
macro avg      0.712271  0.688176  0.695513  114.000000
weighted avg   0.728979  0.736842  0.728970  114.000000




# Naive Bayes Grid Search

In [81]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import sklearn

clf = GaussianNB()

param_grid = { 
    'var_smoothing': np.logspace(0,-9, num=100)
}

cv_method = RepeatedStratifiedKFold(n_splits=5, 
                                    n_repeats=3, 
                                    random_state=900)

CV_nbc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= cv_method, scoring='accuracy')
CV_nbc.fit(X_train, y_train)

GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=5, random_state=900),
             estimator=GaussianNB(),
             param_grid={'var_smoothing': array([1.00000000e+00, 8.11130831e-01, 6.57933225e-01, 5.33669923e-01,
       4.32876128e-01, 3.51119173e-01, 2.84803587e-01, 2.31012970e-01,
       1.87381742e-01, 1.51991108e-01, 1.23284674e-01, 1.00000000e-01,
       8.11130831e-02, 6.57933225e-02, 5...
       1.23284674e-07, 1.00000000e-07, 8.11130831e-08, 6.57933225e-08,
       5.33669923e-08, 4.32876128e-08, 3.51119173e-08, 2.84803587e-08,
       2.31012970e-08, 1.87381742e-08, 1.51991108e-08, 1.23284674e-08,
       1.00000000e-08, 8.11130831e-09, 6.57933225e-09, 5.33669923e-09,
       4.32876128e-09, 3.51119173e-09, 2.84803587e-09, 2.31012970e-09,
       1.87381742e-09, 1.51991108e-09, 1.23284674e-09, 1.00000000e-09])},
             scoring='accuracy')

# Naive Bayes Best Parameters

In [83]:
CV_nbc.best_params_

{'var_smoothing': 0.43287612810830584}

# Naive Bayes Classifier w/ Results


In [132]:
nbc = GaussianNB(var_smoothing=0.01873817422860384)
nbc = nbc.fit(X_train,y_train)
y_pred = nbc.predict(X_test)
nbc.fit(X_train, y_train)
report = classification_report(y_test, y_pred, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.647059  0.445946  0.528000   74.000000
normal         0.349206  0.550000  0.427184   40.000000
accuracy       0.482456  0.482456  0.482456    0.482456
macro avg      0.498133  0.497973  0.477592  114.000000
weighted avg   0.542549  0.482456  0.492626  114.000000


# Naive Bayes Grid Search (Fisher's Score)

In [86]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import sklearn

clf = GaussianNB()

param_grid = { 
    'var_smoothing': np.logspace(0,-9, num=100)
}

cv_method = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=900)

CV_nbc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= cv_method, scoring='accuracy')
CV_nbc.fit(X_train1, y_train1)

GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=5, random_state=900),
             estimator=GaussianNB(),
             param_grid={'var_smoothing': array([1.00000000e+00, 8.11130831e-01, 6.57933225e-01, 5.33669923e-01,
       4.32876128e-01, 3.51119173e-01, 2.84803587e-01, 2.31012970e-01,
       1.87381742e-01, 1.51991108e-01, 1.23284674e-01, 1.00000000e-01,
       8.11130831e-02, 6.57933225e-02, 5...
       1.23284674e-07, 1.00000000e-07, 8.11130831e-08, 6.57933225e-08,
       5.33669923e-08, 4.32876128e-08, 3.51119173e-08, 2.84803587e-08,
       2.31012970e-08, 1.87381742e-08, 1.51991108e-08, 1.23284674e-08,
       1.00000000e-08, 8.11130831e-09, 6.57933225e-09, 5.33669923e-09,
       4.32876128e-09, 3.51119173e-09, 2.84803587e-09, 2.31012970e-09,
       1.87381742e-09, 1.51991108e-09, 1.23284674e-09, 1.00000000e-09])},
             scoring='accuracy')

# Naive Bayes Best Parameters (Fisher's Score)

In [87]:
CV_nbc.best_params_

{'var_smoothing': 0.8111308307896871}

# Naive Bayes Classifier w/ Results (Fisher)

In [93]:
nbc = GaussianNB(var_smoothing=0.8111308307896871)
nbc = nbc.fit(X_train1, y_train1)
y_pred1 = nbc.predict(X_test1)
nbc.fit(X_train1, y_train1)
report = classification_report(y_test1, y_pred1, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.661538  0.581081  0.618705   74.000000
normal         0.367347  0.450000  0.404494   40.000000
accuracy       0.535088  0.535088  0.535088    0.535088
macro avg      0.514443  0.515541  0.511600  114.000000
weighted avg   0.558313  0.535088  0.543543  114.000000


# Naive Bayes Grid Search (Correlation)

In [89]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
import sklearn

clf = GaussianNB()

param_grid = { 
    'var_smoothing': np.logspace(0,-9, num=100)
}

cv_method = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=900)

CV_nbc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= cv_method, scoring='accuracy')
CV_nbc.fit(X_train2, y_train2)

GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=5, random_state=900),
             estimator=GaussianNB(),
             param_grid={'var_smoothing': array([1.00000000e+00, 8.11130831e-01, 6.57933225e-01, 5.33669923e-01,
       4.32876128e-01, 3.51119173e-01, 2.84803587e-01, 2.31012970e-01,
       1.87381742e-01, 1.51991108e-01, 1.23284674e-01, 1.00000000e-01,
       8.11130831e-02, 6.57933225e-02, 5...
       1.23284674e-07, 1.00000000e-07, 8.11130831e-08, 6.57933225e-08,
       5.33669923e-08, 4.32876128e-08, 3.51119173e-08, 2.84803587e-08,
       2.31012970e-08, 1.87381742e-08, 1.51991108e-08, 1.23284674e-08,
       1.00000000e-08, 8.11130831e-09, 6.57933225e-09, 5.33669923e-09,
       4.32876128e-09, 3.51119173e-09, 2.84803587e-09, 2.31012970e-09,
       1.87381742e-09, 1.51991108e-09, 1.23284674e-09, 1.00000000e-09])},
             scoring='accuracy')

# Naive Bayes Best Parameters (Correlation)

In [90]:
CV_nbc.best_params_

{'var_smoothing': 1.0}

# Naive Bayes Classifier w/ Results (Correlation)

In [96]:
nbc = GaussianNB(var_smoothing=0.08346346346)
nbc = nbc.fit(X_train2, y_train2)
y_pred2 = nbc.predict(X_test2)
nbc.fit(X_train2, y_train2)
report = classification_report(y_test2, y_pred2, output_dict = True)
cr = pd.DataFrame(report).transpose()
print(cr)

              precision    recall  f1-score     support
fatty          0.678571  0.513514  0.584615   74.000000
normal         0.379310  0.550000  0.448980   40.000000
accuracy       0.526316  0.526316  0.526316    0.526316
macro avg      0.528941  0.531757  0.516797  114.000000
weighted avg   0.573568  0.526316  0.537024  114.000000
