## KNN MODELS

In [3]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report


## OverSampling Method

In [2]:
def over_sampling(X_train,y_train):
    oversampling = SMOTE()
    X_train, y_train = oversampling.fit_resample(X_train, y_train)
    return X_train,y_train

## Model Method

In [19]:
def model(X_train, y_train, X_val, y_val, oversampling=True, n_neighbors=1):
    if oversampling:
        X_train, y_train = over_sampling(X_train, y_train)
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train, y_train)
    print(y_train.value_counts())
    preds_knn = knn.predict(X_val)
    print(confusion_matrix(y_val, preds_knn))
    print(classification_report(y_val, preds_knn))


## KNN Without Anything

In [4]:
train_data = pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\train_data_without_anything.csv')
val_data=pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\val_data_without_anything.csv')


In [5]:
X_train = train_data.drop(columns=['species'])
y_train = train_data['species']
X_val = val_data.drop(columns=['species'])
y_val = val_data['species']

In [20]:
model(X_train,y_train,X_val,y_val)

0    121
1    121
2    121
Name: species, dtype: int64
[[30  0  0]
 [ 0 24  1]
 [ 3  0 11]]
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        30
           1       1.00      0.96      0.98        25
           2       0.92      0.79      0.85        14

    accuracy                           0.94        69
   macro avg       0.94      0.92      0.93        69
weighted avg       0.94      0.94      0.94        69



In [21]:
model(X_train, y_train, X_val, y_val,oversampling=False)


0    121
1     98
2     54
Name: species, dtype: int64
[[30  0  0]
 [ 0 24  1]
 [ 3  0 11]]
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        30
           1       1.00      0.96      0.98        25
           2       0.92      0.79      0.85        14

    accuracy                           0.94        69
   macro avg       0.94      0.92      0.93        69
weighted avg       0.94      0.94      0.94        69



## KNN with Min_Max Scaler

In [23]:
train_data = pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\train_data_with_min_max_scaler.csv')
val_data = pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\val_data_with_min_max_scaler.csv')


In [24]:
X_train = train_data.drop(columns=['species'])
y_train = train_data['species']
X_val = val_data.drop(columns=['species'])
y_val = val_data['species']


In [25]:
model(X_train, y_train, X_val, y_val)


0    121
1    121
2    121
Name: species, dtype: int64
[[29  0  1]
 [ 0 25  0]
 [ 0  0 14]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        30
           1       1.00      1.00      1.00        25
           2       0.93      1.00      0.97        14

    accuracy                           0.99        69
   macro avg       0.98      0.99      0.98        69
weighted avg       0.99      0.99      0.99        69



In [26]:
model(X_train, y_train, X_val, y_val, oversampling=False)


0    121
1     98
2     54
Name: species, dtype: int64
[[29  0  1]
 [ 0 25  0]
 [ 0  0 14]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98        30
           1       1.00      1.00      1.00        25
           2       0.93      1.00      0.97        14

    accuracy                           0.99        69
   macro avg       0.98      0.99      0.98        69
weighted avg       0.99      0.99      0.99        69



## KNN with Std_Scaler

In [27]:
train_data_with_min_max = pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\train_data_with_std_scaler.csv')
val_data_with_min_max = pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\val_data_with_std_scaler.csv')


In [28]:
X_train = train_data.drop(columns=['species'])
y_train = train_data['species']
X_val = val_data.drop(columns=['species'])
y_val = val_data['species']


In [29]:
model(X_train, y_train, X_val, y_val)


0    121
1    121
2    121
Name: species, dtype: int64
[[30  0  0]
 [ 0 24  1]
 [ 3  0 11]]
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        30
           1       1.00      0.96      0.98        25
           2       0.92      0.79      0.85        14

    accuracy                           0.94        69
   macro avg       0.94      0.92      0.93        69
weighted avg       0.94      0.94      0.94        69



In [30]:
model(X_train, y_train, X_val, y_val, oversampling=False)


0    121
1     98
2     54
Name: species, dtype: int64
[[30  0  0]
 [ 0 24  1]
 [ 3  0 11]]
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        30
           1       1.00      0.96      0.98        25
           2       0.92      0.79      0.85        14

    accuracy                           0.94        69
   macro avg       0.94      0.92      0.93        69
weighted avg       0.94      0.94      0.94        69



## KNN with Robust_Scaler

In [31]:
train_data_with_min_max = pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\train_data_with_robust_scaler.csv')
val_data_with_min_max = pd.read_csv(
    'F:\\clustering-classification-dashboard\\data\\val_data_with_robust_scaler.csv')


In [32]:
X_train = train_data.drop(columns=['species'])
y_train = train_data['species']
X_val = val_data.drop(columns=['species'])
y_val = val_data['species']


In [33]:
model(X_train, y_train, X_val, y_val)


0    121
1    121
2    121
Name: species, dtype: int64
[[30  0  0]
 [ 0 24  1]
 [ 3  0 11]]
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        30
           1       1.00      0.96      0.98        25
           2       0.92      0.79      0.85        14

    accuracy                           0.94        69
   macro avg       0.94      0.92      0.93        69
weighted avg       0.94      0.94      0.94        69



In [34]:
model(X_train, y_train, X_val, y_val, oversampling=False)


0    121
1     98
2     54
Name: species, dtype: int64
[[30  0  0]
 [ 0 24  1]
 [ 3  0 11]]
              precision    recall  f1-score   support

           0       0.91      1.00      0.95        30
           1       1.00      0.96      0.98        25
           2       0.92      0.79      0.85        14

    accuracy                           0.94        69
   macro avg       0.94      0.92      0.93        69
weighted avg       0.94      0.94      0.94        69

