In [None]:
#Import deps 
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler 
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold


from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import ClusterCentroids
from imblearn.over_sampling import SMOTENC

from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report

### Dataset

In [None]:
path = os.getcwd()
data = 'Resources\\Wine_data_both.csv'
path = path.replace('all_wine_raw',data)

df = pd.read_csv(path)

### <font color='blue'>Exploratory analysis<font>

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.head()

#### Casting as category type

In [None]:
df['quality'] = df['quality'].astype('category')
df['Wine'] = df['Wine'].astype('category')

In [None]:
df.describe()

#### Feature distribution  Viz

In [None]:
df.boxplot()
plt.xticks(rotation=90)
plt.show()

**Note:**
        The scale for 'total sulfur dioxide' and 'free sulfur dioxided' measurments are significantly bigger than the rest<br>
        of the features
    

#### Correlation

In [None]:
X_df = df.drop(columns = ['Wine','quality'])

X_corr = X_df.corr().round(1)

sns.heatmap(X_corr,annot=True,cmap='flare') 
plt.show()

**Note:**
            free sulfur dioxide and total sulfur dioxide have a significant positive correlation<br>
            alcohol and density have a significant negative correlation.

#### Target : Wine type

In [None]:
sns.set_style(rc = {'axes.facecolor': 'darkgrey'})
sns.countplot(
               x = df['Wine'],
               hue = df['Wine'],
               palette=['white',"red"]).set(title='Wine type'
              )
plt.show()

In [None]:
df['Wine'].value_counts()

**Note:** 
    The target variable is heavily inbalanced 3 'White' to 1 'Red'

#### Target: Quality

In [None]:
sns.countplot(x = df['quality']).set(title='Wine quality')
plt.show()

In [None]:
df['quality'].value_counts()

#### Quality count by wine type

In [None]:
sns.set_style(rc = {'axes.facecolor': 'darkgrey'})
sns.countplot( x = df['quality'],
               hue = df['Wine'], 
               palette=['white',"red"]).set(title='Quality count by Wine Type ')
plt.show()

### Reclassifying target

In [None]:
df['quality'] = df['quality'].replace([3,4,5],'low')
df['quality'] = df['quality'].replace([6],'medium')
df['quality'] = df['quality'].replace([7,8,9],'high')


df['quality'] = df['quality'].astype('category')

### Reclassified Viz

In [None]:
sns.set_style(rc = {'axes.facecolor': 'darkgrey'})
sns.countplot(
               x = df['quality'],
               hue = df['Wine'], 
               palette=['white',"red"]).set(title='Quality count by Wine Type ')
plt.show()

---

### Splitting and encoding features

In [None]:
X = df.drop(columns = ['Wine','quality',])
y = df['quality']

le = LabelEncoder()
le.fit(y)

y = le.transform(y)

---

### Over Sampling

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

#### Viz

In [None]:
sns.countplot(x=y_resampled).set_xticklabels(le.classes_)
plt.show()

---

### Building Models

#### Splitting data

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X_resampled, y_resampled,random_state=42)

---

#### KNeighborsClassifier

In [None]:
train_scores = []
test_scores = []
for n in range(1,10):
    knc = KNeighborsClassifier(n_neighbors = n)
    knc.fit(X_train,y_train)
    train_scores.append(knc.score(X_train,y_train))
    test_scores.append(knc.score(X_test,y_test))

In [None]:
plt.plot([*range(1,10)],train_scores)
plt.plot([*range(1,10)],test_scores)
plt.show()

#### Model fit

In [None]:
knc = KNeighborsClassifier(n_neighbors = 1 )
knc.fit(X_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = knc.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

In [None]:
print(classification_report(y_test,y_pred))

---

#### SVC

In [None]:
param_grid_svc = {
              'C': [5,10,15], 
              'gamma': [1,2,3,4],
              'kernel': ['rbf']
              }
grid_svc= GridSearchCV(
                       SVC(),
                       param_grid_svc,
                       refit=True,verbose=3
                       )

grid_svc.fit(X_train,y_train)

In [None]:
print(grid_svc.best_params_)
print(grid_svc.best_score_)

#### Model fit

In [None]:
C = grid_svc.best_params_['C'] 
gamma = grid_svc.best_params_['gamma']
kernel = grid_svc.best_params_['kernel']
                              
svc = SVC(C = C,
         gamma = gamma,
         kernel = kernel)

svc.fit(X_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = svc.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classfication Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### DecisionTreeClassifier

In [None]:
param_grid_dtc = { 
                  'criterion': ['gini', 'entropy', 'log_loss'], 
                  'splitter': ['best','random'],
                  'max_features': ['sqrt', 'log2'],
                  'max_depth': [10,20, 30], 
                  'max_leaf_nodes': [900,1000,1100],
                  'min_samples_split': [4,6,8], 
                    } 

grid_dtc = GridSearchCV(
                        DecisionTreeClassifier(),
                        param_grid_dtc,
                        refit=True,verbose=3
                       )

grid_dtc.fit(X_train,y_train)

In [None]:
print(grid_dtc.best_params_)
print(grid_dtc.best_score_)

#### Model Fit

In [None]:
dtc = DecisionTreeClassifier( 
                             criterion =grid_dtc.best_params_['criterion'],
                             splitter = grid_dtc.best_params_['splitter'],
                             max_features =  grid_dtc.best_params_['max_features'],
                             max_depth =  grid_dtc.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc.best_params_['min_samples_split']
    
                            )

dtc.fit(X_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = dtc.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classfication Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### RandomForestClassifier

In [None]:
param_grid_rfc = { 
                 'n_estimators': [200, 215,220], 
                 'max_features': ['sqrt', 'log2', None], 
                 'max_depth': [10,20, 30], 
                 'max_leaf_nodes': [900,1000,1100],
                 'min_samples_split': [4,6,8], 
                 } 

grid_rfc = GridSearchCV(
                        RandomForestClassifier(),
                        param_grid_rfc,
                        refit=True,verbose=3
                       )

grid_rfc.fit(X_train,y_train)

In [None]:
print(grid_rfc.best_params_)
print(grid_rfc.best_score_)

#### Model fit

In [None]:
rfc = RandomForestClassifier( 
                             n_estimators = grid_rfc.best_params_['n_estimators'], 
                             max_features =  grid_rfc.best_params_['max_features'],
                             max_depth =  grid_rfc.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc.best_params_['max_leaf_nodes'],
                        
                            )

rfc.fit(X_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = rfc.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classfication Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### AdaBoostClassifier (estimator = DecisionTreeClassifier)

In [None]:
param_grid_abc_dt = { 
                     'n_estimators': [210,212,215,],
                     'learning_rate':[0.5,0.75,1]
                    }

dtc = DecisionTreeClassifier( 
                             criterion =grid_dtc.best_params_['criterion'],
                             splitter = grid_dtc.best_params_['splitter'],
                             max_features =  grid_dtc.best_params_['max_features'],
                             max_depth =  grid_dtc.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc.best_params_['min_samples_split']
    
                            )


abc_dt = AdaBoostClassifier(estimator = dtc)

grid_abc_dt= GridSearchCV(
                          abc_dt,
                          param_grid_abc_dt,
                          refit=True,verbose=3
                          )

grid_abc_dt.fit(X_train,y_train)

In [None]:
print(grid_abc_dt.best_params_)
print(grid_abc_dt.best_score_)

#### Model fit

In [None]:
dtc = DecisionTreeClassifier( 
                             criterion =grid_dtc.best_params_['criterion'],
                             splitter = grid_dtc.best_params_['splitter'],
                             max_features =  grid_dtc.best_params_['max_features'],
                             max_depth =  grid_dtc.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc.best_params_['min_samples_split']
    
                            )

abc_dt = AdaBoostClassifier(
                        estimator = dtc,
                        n_estimators = grid_abc_dt.best_params_['n_estimators'],
                        learning_rate= grid_abc_dt.best_params_['learning_rate']
                        )

abc_dt.fit(X_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = abc_dt.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### AdaBoostClassifier (estimator = RandomForestClassifier)

In [None]:
param_grid_abc_rfc = { 
                      'n_estimators': [210,212,215,],
                      'learning_rate':[0.5,0.75,1]
                     }

rfc = RandomForestClassifier( 
                             n_estimators = grid_rfc.best_params_['n_estimators'], 
                             max_features =  grid_rfc.best_params_['max_features'],
                             max_depth =  grid_rfc.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc.best_params_['max_leaf_nodes'],
                             
                            )

abc_rfc = AdaBoostClassifier(estimator = rfc)

grid_abc_rfc = GridSearchCV(
                           abc_rfc,
                           param_grid_abc_rfc,
                           refit=True,verbose=3
                           )

grid_abc_rfc.fit(X_train,y_train)

In [None]:
print(grid_abc_rfc.best_params_)
print(grid_abc_rfc.best_score_)

#### Model Fit

In [None]:
rfc = RandomForestClassifier( 
                             n_estimators = grid_rfc.best_params_['n_estimators'], 
                             max_features =  grid_rfc.best_params_['max_features'],
                             max_depth =  grid_rfc.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc.best_params_['max_leaf_nodes'],
                             
    
                            )

abc_rfc = AdaBoostClassifier(
                             estimator = rfc,
                             n_estimators = grid_abc_rfc.best_params_['n_estimators'],
                             learning_rate= grid_abc_rfc.best_params_['learning_rate']
                            )

abc_rfc.fit(X_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = abc_rfc.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

---

### Scaled

#### Scaling Data

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)

#### Feature distribution  Viz

In [None]:
cols = df.drop(columns=['Wine','quality']).columns

X_df = pd.DataFrame(X_scaled,columns = cols)
X_df.boxplot()
plt.xticks(rotation=90)
plt.show()

### Scaling Data

In [None]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

---

#### KNeighborsClassifier

In [None]:
train_scores_scaled = []
test_scores_scaled = []
for n in range(1,10):
    knc_scaled = KNeighborsClassifier(n_neighbors = n)
    knc_scaled.fit(
            X_train_scaled,
            y_train
           )
    train_scores_scaled.append(knc_scaled.score(X_train_scaled,y_train))
    test_scores_scaled.append(knc_scaled.score(X_test_scaled,y_test))

In [None]:
plt.plot([*range(1,10)],train_scores_scaled)
plt.plot([*range(1,10)],test_scores_scaled)
plt.show()

#### Model fit

In [None]:
knc_scaled = KNeighborsClassifier(n_neighbors = 1 )
knc_scaled.fit(X_train_scaled,y_train)

#### Confusion Matrix

In [None]:
y_pred = knc_scaled.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### SVC

In [None]:
param_grid_svc_scaled = {
              'C': [5,10,15], 
              'gamma': [1,2,3,4],
              'kernel': ['rbf']
              }
grid_svc_scaled= GridSearchCV(
                              SVC(),
                              param_grid_svc_scaled,
                              refit=True,verbose=3
                             )

grid_svc_scaled.fit(X_train_scaled,y_train)

In [None]:
print(grid_svc_scaled.best_params_)
print(grid_svc_scaled.best_score_)

#### Model Fit

In [None]:
C = grid_svc_scaled.best_params_['C'] 
gamma = grid_svc_scaled.best_params_['gamma']
kernel = grid_svc_scaled.best_params_['kernel']
                              
svc_scaled = SVC(C = C,
         gamma = gamma,
         kernel = kernel)

svc_scaled.fit(X_train_scaled,y_train)

#### Confusion Matrix

In [None]:
y_pred = svc_scaled.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### DecisionTreeClassifier

In [None]:
param_grid_dtc_scaled = { 
                  'criterion': ['gini', 'entropy', 'log_loss'], 
                  'splitter': ['best','random'],
                  'max_features': ['sqrt', 'log2'],
                  'max_depth': [10,20, 30], 
                  'max_leaf_nodes': [900,1000,1100],
                  'min_samples_split': [4,6,8], 
                    } 

grid_dtc_scaled = GridSearchCV(
                        DecisionTreeClassifier(),
                        param_grid_dtc_scaled,
                        refit=True,verbose=3
                       )

grid_dtc_scaled.fit(X_train_scaled,y_train)

In [None]:
print(grid_dtc_scaled.best_params_)
print(grid_dtc_scaled.best_score_)

#### Model Fit

In [None]:
dtc_scaled = DecisionTreeClassifier( 
                             criterion =grid_dtc_scaled.best_params_['criterion'],
                             splitter = grid_dtc_scaled.best_params_['splitter'],
                             max_features =  grid_dtc_scaled.best_params_['max_features'],
                             max_depth =  grid_dtc_scaled.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc_scaled.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc_scaled.best_params_['min_samples_split']
    
                            )

dtc_scaled.fit(X_train_scaled,y_train)

#### Confusion Matrix

In [None]:
y_pred = dtc_scaled.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classfication Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### RandomForestClassifier

In [None]:
param_grid_rfc_scaled = { 
                 'n_estimators': [200, 215,220], 
                 'max_features': ['sqrt', 'log2', None], 
                 'max_depth': [10,20, 30], 
                 'max_leaf_nodes': [900,1000,1100],
                 'min_samples_split': [4,6,8], 
                 } 

grid_rfc_scaled = GridSearchCV(
                        RandomForestClassifier(),
                        param_grid_rfc_scaled,
                        refit=True,verbose=3
                       )

grid_rfc_scaled.fit(X_train_scaled,y_train)

In [None]:
print(grid_rfc_scaled.best_params_)
print(grid_rfc_scaled.best_score_)

#### Model Fit

In [None]:
rfc_scaled = RandomForestClassifier( 
                             n_estimators = grid_rfc_scaled.best_params_['n_estimators'], 
                             max_features =  grid_rfc_scaled.best_params_['max_features'],
                             max_depth =  grid_rfc_scaled.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc_scaled.best_params_['max_leaf_nodes'],
    
                            )

rfc_scaled.fit(X_train_scaled,y_train)

#### Confusion Matrix

In [None]:
y_pred = rfc_scaled.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classfication Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### AdaBoostClassifier (estimator = DecisionTreeClassifier)

In [None]:
param_grid_abc_dt_scaled = { 
                     'n_estimators': [210,212,215,],
                     'learning_rate':[0.5,0.75,1]
                    }

dtc_scaled = DecisionTreeClassifier( 
                             criterion =grid_dtc_scaled.best_params_['criterion'],
                             splitter = grid_dtc_scaled.best_params_['splitter'],
                             max_features =  grid_dtc_scaled.best_params_['max_features'],
                             max_depth =  grid_dtc_scaled.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc_scaled.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc_scaled.best_params_['min_samples_split']
    
                            )


abc_dt_scaled = AdaBoostClassifier(estimator = dtc_scaled)

grid_abc_dt_scaled= GridSearchCV(
                          abc_dt_scaled,
                          param_grid_abc_dt,
                          refit=True,verbose=3
                          )

grid_abc_dt_scaled.fit(X_train_scaled,y_train)

In [None]:
print(grid_abc_dt_scaled.best_params_)
print(grid_abc_dt_scaled.best_score_)

#### Model Fit

In [None]:
dtc_scaled = DecisionTreeClassifier( 
                             criterion =grid_dtc_scaled.best_params_['criterion'],
                             splitter = grid_dtc_scaled.best_params_['splitter'],
                             max_features =  grid_dtc_scaled.best_params_['max_features'],
                             max_depth =  grid_dtc_scaled.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc_scaled.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc_scaled.best_params_['min_samples_split']
    
                            )

abc_dt_scaled = AdaBoostClassifier(
                        estimator = dtc_scaled,
                        n_estimators = grid_abc_dt_scaled.best_params_['n_estimators'],
                        learning_rate= grid_abc_dt_scaled.best_params_['learning_rate']
                        )

abc_dt_scaled.fit(X_train_scaled,y_train)

#### Confusion Matrix

In [None]:
y_pred= abc_dt_scaled.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### AdaBoostClassifier (estimator = RandomForestClassifier)

In [None]:
param_grid_abc_rfc_scaled = { 
                      'n_estimators': [210,212,215,],
                      'learning_rate':[0.5,0.75,1]
                     }

rfc_scaled = RandomForestClassifier( 
                             n_estimators = grid_rfc_scaled.best_params_['n_estimators'], 
                             max_features =  grid_rfc_scaled.best_params_['max_features'],
                             max_depth =  grid_rfc_scaled.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc_scaled.best_params_['max_leaf_nodes'],
                             
                            )

abc_rfc_scaled = AdaBoostClassifier(estimator = rfc)

grid_abc_rfc_scaled = GridSearchCV(
                           abc_rfc_scaled,
                           param_grid_abc_rfc_scaled,
                           refit=True,verbose=3
                           )

grid_abc_rfc_scaled.fit(X_train_scaled,y_train)

In [None]:
print(grid_abc_rfc_scaled.best_params_)
print(grid_abc_rfc_scaled.best_score_)

### Mode Fit

In [None]:
rfc_scaled = RandomForestClassifier( 
                             n_estimators = grid_rfc_scaled.best_params_['n_estimators'], 
                             max_features =  grid_rfc_scaled.best_params_['max_features'],
                             max_depth =  grid_rfc_scaled.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc_scaled.best_params_['max_leaf_nodes'],
                             #class_weight=class_weight_dict
    
                            )

abc_rfc_scaled = AdaBoostClassifier(
                             estimator = rfc,
                             n_estimators = grid_abc_rfc_scaled.best_params_['n_estimators'],
                             learning_rate= grid_abc_rfc_scaled.best_params_['learning_rate']
                            )

abc_rfc_scaled.fit(X_train_scaled,y_train)

#### Confusion Matrix

In [None]:
y_pred = abc_rfc_scaled.predict(X_test_scaled)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

---

### PCA Analysis

#### Viz

In [None]:
pca = PCA()
pca.fit(X_scaled)

pca.explained_variance_ratio_.shape[0]
n= [*range(1,pca.explained_variance_ratio_.shape[0]+1)]
plt.bar(n,pca.explained_variance_ratio_)
plt.show()

#### PCA Viz

In [None]:

pca = PCA(n_components = 3)
X_pca = pca.fit_transform(X_scaled)

pca_1 = X_pca[:,0]
pca_2= X_pca[:,1]
pca_3 = X_pca[:,2]

fig = plt.figure()
ax = plt.axes(projection='3d')
ax.scatter3D(pca_1, pca_2, pca_3, 'gray',c=y)
plt.show()

#### PCA Training & Testing Data

In [None]:
pca = PCA(n_components = 10)
X_pca_train = pca.fit_transform(X_train_scaled)
X_pca_test =  pca.transform(X_test_scaled)

---

### Models

---

#### KNeighborsClassifier

In [None]:
train_scores_pca = []
test_scores_pca = []
for n in range(1,10):
    knc_pca = KNeighborsClassifier(n_neighbors = n)
    knc_pca.fit(
            X_pca_train,
            y_train
           )
    train_scores_pca.append(knc_pca.score(X_pca_train,y_train))
    test_scores_pca.append(knc_pca.score(X_pca_test,y_test))

In [None]:
plt.plot([*range(1,10)],train_scores_pca)
plt.plot([*range(1,10)],test_scores_pca)
plt.show()

#### Model Fit

In [None]:
knc_pca = KNeighborsClassifier(n_neighbors = 1 )
knc_pca.fit(X_pca_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = knc_pca.predict(X_pca_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

In [None]:
print(classification_report(y_test,y_pred))

---

#### SVC

In [None]:
param_grid_svc_pca = {
              'C': [5,10,15], 
              'gamma': [1,2,3,4],
              'kernel': ['rbf']
              }
grid_svc_pca= GridSearchCV(
                              SVC(),
                              param_grid_svc_pca,
                              refit=True,verbose=3
                             )

grid_svc_pca.fit(X_pca_train,y_train)

In [None]:
print(grid_svc_pca.best_params_)
print(grid_svc_pca.best_score_)

#### Model Fit

In [None]:
C = grid_svc_pca.best_params_['C'] 
gamma = grid_svc_pca.best_params_['gamma']
kernel = grid_svc_pca.best_params_['kernel']
                              
svc_pca = SVC(C = C,
         gamma = gamma,
         kernel = kernel)

svc_pca.fit(X_pca_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = svc_pca.predict(X_pca_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

In [None]:
print(classification_report(y_test,y_pred))

---

#### Class Weights

In [None]:
classes = np.unique(y)
weights = compute_class_weight(class_weight='balanced',
                                    classes=classes,
                                    y=y_train)
class_weight_dict = {clas: weight for clas, weight in zip(classes, weights)}


#### DecisionTreeClassifier

In [None]:
param_grid_dtc_pca = { 
                  'criterion': ['gini', 'entropy', 'log_loss'], 
                  'splitter': ['best','random'],
                  'max_features': ['sqrt', 'log2'],
                  'max_depth': [10,20, 30], 
                  'max_leaf_nodes': [900,1000,1100],
                  'min_samples_split': [4,6,8],
                    } 

grid_dtc_pca = GridSearchCV(
                        DecisionTreeClassifier(class_weight = class_weight_dict),
                        param_grid_dtc_pca,
                        refit=True,verbose=3,
                       )

grid_dtc_pca.fit(X_pca_train,y_train)

In [None]:
print(grid_dtc_pca.best_params_)
print(grid_dtc_pca.best_score_)

#### Model Fit

In [None]:
dtc_pca = DecisionTreeClassifier( 
                             criterion =grid_dtc_pca.best_params_['criterion'],
                             splitter = grid_dtc_pca.best_params_['splitter'],
                             max_features =  grid_dtc_pca.best_params_['max_features'],
                             max_depth =  grid_dtc_pca.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc_pca.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc_pca.best_params_['min_samples_split'],
                             class_weight = class_weight_dict
                            )

dtc_pca.fit(X_pca_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = dtc_pca.predict(X_pca_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### RandomForestClassifier

In [None]:
param_grid_rfc_pca = { 
                 'n_estimators': [200, 215,220], 
                 'max_features': ['sqrt', 'log2', None], 
                 'max_depth': [10,20, 30], 
                 'max_leaf_nodes': [900,1000,1100],
                 'min_samples_split': [4,6,8], 
                 } 

grid_rfc_pca = GridSearchCV(
                        RandomForestClassifier(class_weight = class_weight_dict),
                        param_grid_rfc_pca,
                        refit=True,verbose=3
                       )

grid_rfc_pca.fit(X_pca_train,y_train)

In [None]:
print(grid_rfc_pca.best_params_)
print(grid_rfc_pca.best_score_)

#### Model Fit

In [None]:
rfc_pca = RandomForestClassifier( 
                             n_estimators = grid_rfc_pca.best_params_['n_estimators'], 
                             max_features =  grid_rfc_pca.best_params_['max_features'],
                             max_depth =  grid_rfc_pca.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc_pca.best_params_['max_leaf_nodes'],
                             class_weight = class_weight_dict
                            )

rfc_pca.fit(X_pca_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = rfc_pca.predict(X_pca_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### AdaBoostClassifier(estimator = DecisionTreeClassifier )

In [None]:
param_grid_abc_dtc_pca = { 
                     'n_estimators': [210,212,215,],
                     'learning_rate':[0.5,0.75,1]
                    }

dtc_pca = DecisionTreeClassifier( 
                             criterion =grid_dtc_pca.best_params_['criterion'],
                             splitter = grid_dtc_pca.best_params_['splitter'],
                             max_features =  grid_dtc_pca.best_params_['max_features'],
                             max_depth =  grid_dtc_pca.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc_pca.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc_pca.best_params_['min_samples_split'],
                             class_weight = class_weight_dict
                            )


abc_dtc_pca = AdaBoostClassifier(estimator = dtc_pca)

grid_abc_dtc_pca= GridSearchCV(
                          abc_dtc_pca,
                          param_grid_abc_dtc_pca,
                          refit=True,verbose=3
                          )

grid_abc_dtc_pca.fit(X_pca_train,y_train)

In [None]:
print(grid_abc_dtc_pca.best_params_)
print(grid_abc_dtc_pca.best_score_)

#### Model Fit

In [None]:
dtc_pca = DecisionTreeClassifier( 
                             criterion =grid_dtc_pca.best_params_['criterion'],
                             splitter = grid_dtc_pca.best_params_['splitter'],
                             max_features =  grid_dtc_pca.best_params_['max_features'],
                             max_depth =  grid_dtc_pca.best_params_['max_depth'], 
                             max_leaf_nodes = grid_dtc_pca.best_params_['max_leaf_nodes'],
                             min_samples_split = grid_dtc_pca.best_params_['min_samples_split'],
                             class_weight = class_weight_dict
                            )
abc_dtc_pca = AdaBoostClassifier(
                        estimator = dtc_pca,
                        n_estimators = grid_abc_dtc_pca.best_params_['n_estimators'],
                        learning_rate= grid_abc_dtc_pca.best_params_['learning_rate']
                        )

abc_dtc_pca.fit(X_pca_train,y_train)

#### Confusion Matrix

In [None]:
y_pred =abc_dtc_pca.predict(X_pca_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classfication Report

In [None]:
print(classification_report(y_test,y_pred))

---

#### AdaBoostClassifier(estimator = RandonForestClassifier )

In [None]:
param_grid_abc_rfc_pca = { 
                      'n_estimators': [210,212,215,],
                      'learning_rate':[0.5,0.75,1]
                     }

rfc_pca = RandomForestClassifier( 
                             n_estimators = grid_rfc_pca.best_params_['n_estimators'], 
                             max_features =  grid_rfc_pca.best_params_['max_features'],
                             max_depth =  grid_rfc_pca.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc_pca.best_params_['max_leaf_nodes'],
                             class_weight = class_weight_dict
                            )

abc_rfc_pca = AdaBoostClassifier(estimator = rfc_pca)

grid_abc_rfc_pca = GridSearchCV(
                           abc_rfc_pca,
                           param_grid_abc_rfc_pca,
                           refit=True,verbose=3
                           )

grid_abc_rfc_pca.fit(X_pca_train,y_train)

In [None]:
print(grid_abc_rfc_pca.best_params_)
print(grid_abc_rfc_pca.best_score_)

#### Model Fit

In [None]:
rfc_pca = RandomForestClassifier( 
                             n_estimators = grid_rfc_pca.best_params_['n_estimators'], 
                             max_features =  grid_rfc_pca.best_params_['max_features'],
                             max_depth =  grid_rfc_pca.best_params_['max_depth'], 
                             max_leaf_nodes = grid_rfc_pca.best_params_['max_leaf_nodes'],
                             class_weight = class_weight_dict
    
                            )

abc_rfc_pca = AdaBoostClassifier(
                             estimator = rfc_pca,
                             n_estimators = grid_abc_rfc_pca.best_params_['n_estimators'],
                             learning_rate= grid_abc_rfc_pca.best_params_['learning_rate']
                            )

abc_rfc_pca.fit(X_pca_train,y_train)

#### Confusion Matrix

In [None]:
y_pred = abc_rfc_pca.predict(X_pca_test)

cm = confusion_matrix(y_test, y_pred)

display = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=le.classes_)
display.plot()
plt.show()

#### Classification Report

In [None]:
print(classification_report(y_test,y_pred))