In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## <span style = 'background :lightblue'>STEP 1 : Import Transforrmed Dataset</span>

In [2]:
filepath_train = r"S2_part2_FE_pipeline_Train_DS.csv"
filepath_test = r"S2_part2_FE_pipeline_Test_DS.csv"

In [3]:
d_train = pd.read_csv(filepath_train  ,index_col= 0 , header= 0)
d_test = pd.read_csv(filepath_test  ,index_col= 0 , header= 0)

In [4]:
d_train.head(3)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalWidthCm,Species
0,0.616198,0.278721,1.427632,2
1,-1.204387,-1.611736,-0.287556,1
2,0.009336,-0.193893,0.767944,2


In [5]:
d_test.head(3)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalWidthCm,Species
0,-0.597525,-0.193893,0.372132,1
1,-0.354781,-0.193893,0.108256,1
2,0.494826,-0.666508,0.372132,2


## <span style = 'background :lightblue'>STEP 2 : Train - Test Split</span>

In [6]:
X_train = d_train.drop(labels= ['Species'] , axis = 1)
y_train = d_train['Species']

X_test = d_test.drop(labels= ['Species'] , axis = 1)
y_test = d_test['Species']

In [7]:
X_train.shape , y_train.shape

((117, 3), (117,))

In [8]:
X_test.shape , y_test.shape

((30, 3), (30,))

In [9]:
y_train.value_counts()

1    40
2    39
0    38
Name: Species, dtype: int64

In [10]:
y_test.value_counts()

1    10
2    10
0    10
Name: Species, dtype: int64

## <span style = 'background :lightblue'>STEP 3 : Training models</span>

In [11]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
from sklearn.metrics import classification_report

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [14]:
models = [LogisticRegression(max_iter=1000), SVC(), GaussianNB(),DecisionTreeClassifier(), RandomForestClassifier()]

In [15]:
def ModelTraining(ML_models , X_train ,y_train ,X_test,y_test):
    
    for model in ML_models :

        model.fit(X_train.values, y_train)
        y_hat_train = model.predict(X_train.values)
        y_hat_test = model.predict(X_test.values)

        print(f"******************* \33[1m{str(model)}\33[0m *******************\n")

        print("\33[1mTraining classification Report :\33[0m \n")
        print(classification_report(y_train,y_hat_train ,zero_division = 1))

        print() 

        print("\33[1mTesting classification Report :\33[0m \n")
        print(classification_report(y_test,y_hat_test , zero_division = 1))

        print()
        

In [16]:
ModelTraining(ML_models = models ,  X_train = X_train ,y_train = y_train ,
              X_test = X_test, y_test = y_test)

******************* [1mLogisticRegression(max_iter=1000)[0m *******************

[1mTraining classification Report :[0m 

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.97      0.95      0.96        40
           2       0.95      0.97      0.96        39

    accuracy                           0.97       117
   macro avg       0.97      0.97      0.97       117
weighted avg       0.97      0.97      0.97       117


[1mTesting classification Report :[0m 

              precision    recall  f1-score   support

           0       1.00      0.90      0.95        10
           1       0.71      1.00      0.83        10
           2       1.00      0.70      0.82        10

    accuracy                           0.87        30
   macro avg       0.90      0.87      0.87        30
weighted avg       0.90      0.87      0.87        30


******************* [1mSVC()[0m *******************

[1mTraining 