In [None]:
import seaborn as sns
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score

df=sns.load_dataset('penguins').dropna()    # (step-1) dataset is being loaded and cleaned by dropping NaN values
print("Dataset has been loaded: \n", df)

In [None]:
X=df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
y=df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15, stratify=y)       # (step-2) 80% data is being set for training and 20% for testing so splitting is done here

model_1 = DecisionTreeClassifier()        # model is being loaded
model_1.fit(X_train, y_train)           # (step-3) model is being trained

pred_1= model_1.predict(X_test)           # (step-4) model is predicting upn 20% testing data
print(pred_1)
score_1 = accuracy_score(y_test, pred_1)
print(f"First predicton is: {score_1:.4f}")      # (step-5) accuracy is being checked while comparing predicted value with y_test (fixed answer) value

model_2 = RandomForestClassifier()
model_2.fit(X_train, y_train)

pred_2=model_2.predict(X_test)
print(pred_2)
score_2 = accuracy_score(y_test, pred_2)
print(f"Second prediction is: {score_2:.4f}")

scaler=StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

model_3 = KNeighborsClassifier()
model_3.fit(X_train_scaled, y_train)

pred_3 = model_3.predict(X_test_scaled)
print(pred_3)
score_3 = accuracy_score(y_test, pred_3)
print(f"Third prediction is: {score_3:.4f}")

In [None]:
s=plt.figure(figsize=(12,8))
plot=plt.subplot(2,2,1)
# plot.get_xlabel('Accuracy')
# plot.get_ylabel('Y')
# x=plot.scatter(accuracy_score(y_test, pred_1), accuracy_score(y_test, pred_2), accuracy_score(y_test, pred_3))
scores = [score_1, score_2, score_3]
models = ['D TREE', 'RF', 'KNN']
colors = ['Green', 'yellow', 'skyblue']
bars=plt.bar(scores, models, color=colors, linewidth = 1.2, edgecolor = 'black')

plt.tight_layout()
plt.show()


In [None]:
import joblib
if ((score_1 >= score_2) & (score_1 >= score_3)):
    save_model = model_1
elif ((score_2 >= score_1) & (score_2 >= score_3)):
    save_model = model_2
else:
    save_model = model_3

s_model = joblib.dump(save_model, 'Penguin_Day2_Best.pkl')
print("Best Model has been saved as PKL!")

In [None]:
a = float(input('Enter bill_length (mm): '))
b = float(input('Enter bill_depth (mm): '))
c = float(input('Enter flipper_length (mm): '))
d = float(input('Enter body_mass (g): '))

user_val = [[a, b, c, d]]
loaded_model = joblib.load('Penguin_Day2_Best.pkl')
print("Best predicted specie of penguin with the help of best model is: ", loaded_model.predict(user_val)[0])
print(loaded_model)