In [7]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report, confusion_matrix

%matplotlib inline
pd.set_option("display.max_columns",None)
%config Completer.use_jedi = False

## Train Test Split

In [None]:
X = df2.drop("species", axis=1)
y = df2["species"]

print("Shape of X: ", X.shape)
print("Shape of y: ", y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=2020)

print("Shape of X_train: ", X_train.shape)
print("Shape of X_test: ", X_test.shape)
print("Shape of y_train: ", y_train.shape)
print("Shape of y_test: ", y_test.shape)

## Feature Scaling

In [None]:
sc = StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

## Modelling

In [6]:
model_dict = {1: "Decision Tree", 2: "Random Forest", 3: "KNN"}
def model(X_train, y_train, X_test, y_test):
    
    
    ## Decision Tree
    dt_classifier = DecisionTreeClassifier(criterion="gini")
    dt_classifier.fit(X_train,y_train)
    dt_classifier_score = dt_classifier.score(X_test, y_test)
    print("Decision Tree Accuracy: ", dt_classifier_score * 100)
    
    ## Random Forest
    rf_classifier = RandomForestClassifier(n_estimators=100, criterion="gini")
    rf_classifier.fit(X_train,y_train)
    rf_classifier_score = rf_classifier.score(X_test, y_test)
    print("Random Forest Accuracy: ", rf_classifier_score * 100)
    
    ## KNN
    knn_classifier = KNeighborsClassifier(n_neighbors=5)
    knn_classifier.fit(X_train,y_train)
    knn_classifier_score = knn_classifier.score(X_test, y_test)
    print("KNN Accuracy: ", knn_classifier_score * 100)
    
    return dt_classifier, rf_classifier, knn_classifier

In [None]:
models = model(X_train, y_train, X_test, y_test)
models

### Choosing K Value for K-NN

In [None]:
def choose_k_value(X_train, y_train, X_test, y_test, start_k_value = 1, end_k_value = 40):
    error_rate = []
    
    for k in range(start_k_value, end_k_value):
        
        knn_classifier = KNeighborsClassifier(n_neighbors=k)
        knn_classifier.fit(X_train,y_train)
        pred_k = knn_classifier.predict(X_test)
        error_rate.append(np.mean(pred_k != y_test))
        
    return error_rate

In [None]:
error_rate = choose_k_value(X_train, y_train, X_test, y_test)
error_rate

In [None]:
## Plotting Error rate

plt.figure(figsize=(10,6))
plt.plot(range(1,40),error_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [None]:
## Final Model

knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train,y_train)
knn_classifier_score = knn_classifier.score(X_test, y_test)
print("KNN Accuracy: ", knn_classifier_score * 100)

## Prediction

In [None]:
def prediciton(X_test ,models, model_dict):
    print("Models Predictions:\n")
    for i,model in enumerate(models):
        prediction = model.predict([X_test])
        print(f"{model_dict[i+1]} Predicton: {prediction[0]}")

In [None]:
y_test.iloc[1]

In [None]:
## Testing

index = 1
test = X_test.iloc[index]

prediciton(test, models, model_dict)
print("\nActual Value: ", y_test.iloc[index])

## Cross Validation

In [None]:
pred = knn.predict(X_test)

print('WITH K=23')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))