# Importing the libraries

In [192]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report,accuracy_score,precision_score,recall_score
from yellowbrick import ROCAUC
from imblearn.over_sampling import SMOTE
from warnings import filterwarnings
filterwarnings('ignore')

# Importing the data 

In [193]:
data = pd.read_csv('Data/cleaned_data.csv')
data.head()

Unnamed: 0,Imaginary Part: Min,Imaginary Part: Avg,Real Part: Min,Real Part: Avg,Gender,Age,Smoking,Diagnosis
0,-320.61,-300.563531,-495.26,-464.171991,1.0,77.0,2.0,1
1,-325.39,-314.75036,-473.73,-469.26314,0.0,72.0,2.0,1
2,-323.0,-317.436056,-476.12,-471.897667,1.0,73.0,3.0,1
3,-327.78,-317.39967,-473.73,-468.856388,1.0,76.0,2.0,1
4,-325.39,-316.155785,-478.52,-472.869783,0.0,65.0,2.0,1


# Feature Scaling

In [194]:
features = ['Imaginary Part: Min', 'Imaginary Part: Avg', 'Real Part: Min', 'Real Part: Avg', 'Gender', 'Age', 'Smoking']
target = 'Diagnosis'
X = data[features]
y = data[target]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X = pd.DataFrame(X_scaled, columns=features)
X.head()


Unnamed: 0,Imaginary Part: Min,Imaginary Part: Avg,Real Part: Min,Real Part: Avg,Gender,Age,Smoking
0,-0.200429,0.164022,-0.456438,-0.125737,0.79959,1.239927,0.373718
1,-0.369452,-0.387892,-0.014925,-0.242758,-1.250641,0.991345,0.373718
2,-0.284941,-0.492374,-0.063936,-0.303313,0.79959,1.041062,1.868588
3,-0.453963,-0.490958,-0.014925,-0.233409,0.79959,1.190211,0.373718
4,-0.369452,-0.442567,-0.113153,-0.325658,-1.250641,0.64333,0.373718


# Splling the data

In [195]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Balancing the classes using smote


In [196]:
smoter = SMOTE()
X_smoted, y_smoted = smoter.fit_resample(X_train,y_train)
X_smoted.head()

Unnamed: 0,Imaginary Part: Min,Imaginary Part: Avg,Real Part: Min,Real Part: Avg,Gender,Age,Smoking
0,-0.031053,0.408866,0.034087,-0.103244,-1.250641,0.195883,0.373718
1,-0.453963,-0.622354,-0.113153,-0.288215,-1.250641,-0.699013,-1.121153
2,-0.200429,-0.28175,-0.014925,-0.193448,-1.250641,-0.400714,-1.121153
3,2.968209,2.870266,0.635758,0.383892,0.79959,1.289644,0.373718
4,-0.200429,-0.267213,-0.063936,-0.311025,0.79959,-1.494475,-1.121153


# Modelling

In [197]:
# Helper Functions
def data_modelling(model):
    model_name = model.__class__.__name__
    # Print the name of the model
    print("Model Name:", model_name)

    print('Cleaned Orignal Data:')
    train_model(model,X_train, X_test, y_train, y_test)
    print('Cleaned Smoted Data:')
    train_model(model,X_smoted, X_test, y_smoted, y_test)

def train_model(model,X_train, X_test, y_train, y_test):
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    evaluate(model,y_test,y_pred)

def evaluate(model,y_test,y_pred):
    print(classification_report(y_test,y_pred))

# Logistic Regression

In [198]:
from sklearn.linear_model import LogisticRegression
data_modelling(LogisticRegression(C=10, penalty= 'l2', solver='newton-cg'))

Model Name: LogisticRegression
Cleaned Orignal Data:
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.86      0.92      0.89        13
           2       0.71      0.83      0.77        12
           3       1.00      0.50      0.67         2

    accuracy                           0.80        30
   macro avg       0.89      0.65      0.71        30
weighted avg       0.82      0.80      0.79        30

Cleaned Smoted Data:
              precision    recall  f1-score   support

           0       0.40      0.67      0.50         3
           1       0.92      0.85      0.88        13
           2       0.86      0.50      0.63        12
           3       0.33      1.00      0.50         2

    accuracy                           0.70        30
   macro avg       0.63      0.75      0.63        30
weighted avg       0.80      0.70      0.72        30



# SVC

In [199]:
from sklearn.svm import SVC
data_modelling(SVC(C = 1 ,gamma = 'scale', kernel='rbf'))

Model Name: SVC
Cleaned Orignal Data:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.86      0.92      0.89        13
           2       0.62      0.83      0.71        12
           3       0.00      0.00      0.00         2

    accuracy                           0.73        30
   macro avg       0.37      0.44      0.40        30
weighted avg       0.62      0.73      0.67        30

Cleaned Smoted Data:
              precision    recall  f1-score   support

           0       0.25      0.67      0.36         3
           1       0.91      0.77      0.83        13
           2       1.00      0.42      0.59        12
           3       0.33      1.00      0.50         2

    accuracy                           0.63        30
   macro avg       0.62      0.71      0.57        30
weighted avg       0.84      0.63      0.67        30



# KNN

In [200]:
from sklearn.neighbors import KNeighborsClassifier
# define models and parameters
model = KNeighborsClassifier()
data_modelling(KNeighborsClassifier(metric='manhattan',n_neighbors=17,weights='distance'))
# {'metric': 'manhattan', 'n_neighbors': 17, 'weights': 'distance'}

Model Name: KNeighborsClassifier
Cleaned Orignal Data:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.80      0.92      0.86        13
           2       0.69      0.75      0.72        12
           3       1.00      1.00      1.00         2

    accuracy                           0.77        30
   macro avg       0.62      0.67      0.64        30
weighted avg       0.69      0.77      0.73        30

Cleaned Smoted Data:
              precision    recall  f1-score   support

           0       0.22      0.67      0.33         3
           1       0.77      0.77      0.77        13
           2       1.00      0.17      0.29        12
           3       0.17      0.50      0.25         2

    accuracy                           0.50        30
   macro avg       0.54      0.53      0.41        30
weighted avg       0.77      0.50      0.50        30



# Bagged Dtree

In [201]:
from sklearn.ensemble import BaggingClassifier
data_modelling(BaggingClassifier(n_estimators=1000))

Model Name: BaggingClassifier
Cleaned Orignal Data:


              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       1.00      0.92      0.96        13
           2       0.79      0.92      0.85        12
           3       0.33      0.50      0.40         2

    accuracy                           0.83        30
   macro avg       0.78      0.67      0.68        30
weighted avg       0.87      0.83      0.83        30

Cleaned Smoted Data:
              precision    recall  f1-score   support

           0       0.20      0.33      0.25         3
           1       1.00      0.85      0.92        13
           2       0.90      0.75      0.82        12
           3       0.25      0.50      0.33         2

    accuracy                           0.73        30
   macro avg       0.59      0.61      0.58        30
weighted avg       0.83      0.73      0.77        30



# Random Forest

In [202]:
from sklearn.ensemble import RandomForestClassifier
data_modelling(RandomForestClassifier(max_features='sqrt',n_estimators=100))

Model Name: RandomForestClassifier
Cleaned Orignal Data:
              precision    recall  f1-score   support

           0       0.50      0.33      0.40         3
           1       1.00      0.92      0.96        13
           2       0.85      0.92      0.88        12
           3       0.33      0.50      0.40         2

    accuracy                           0.83        30
   macro avg       0.67      0.67      0.66        30
weighted avg       0.84      0.83      0.83        30

Cleaned Smoted Data:
              precision    recall  f1-score   support

           0       0.33      0.33      0.33         3
           1       1.00      0.85      0.92        13
           2       0.91      0.83      0.87        12
           3       0.40      1.00      0.57         2

    accuracy                           0.80        30
   macro avg       0.66      0.75      0.67        30
weighted avg       0.86      0.80      0.82        30



# Ridge Classifier

In [203]:
from sklearn.linear_model import RidgeClassifier
data_modelling(RidgeClassifier(alpha=0.6))

Model Name: RidgeClassifier
Cleaned Orignal Data:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.75      0.92      0.83        13
           2       0.71      0.83      0.77        12
           3       0.00      0.00      0.00         2

    accuracy                           0.73        30
   macro avg       0.37      0.44      0.40        30
weighted avg       0.61      0.73      0.67        30

Cleaned Smoted Data:
              precision    recall  f1-score   support

           0       0.40      0.67      0.50         3
           1       0.73      0.85      0.79        13
           2       1.00      0.42      0.59        12
           3       0.40      1.00      0.57         2

    accuracy                           0.67        30
   macro avg       0.63      0.73      0.61        30
weighted avg       0.78      0.67      0.66        30

