In [None]:
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [None]:
d_frame = sns.load_dataset('penguins')           #loading the dataset
df = d_frame.dropna()
print("Dataset has been loaded successfully: \n", df)

In [None]:
X = df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
y= df['species']
#splitting of data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
#creating three pipelines for three models
tree_pipe = Pipeline([('DecisinTree', DecisionTreeClassifier(random_state=42))])
rf_pipe = Pipeline ([('rf', RandomForestClassifier(random_state=42))])
knn_pipe = Pipeline([('scaler', StandardScaler()),
                     ('KNN', KNeighborsClassifier())])      #Scaling is being done on data for KNN

In [None]:
# print("df shape after dropna:",df.shape)
# print("x shape", X.shape)
# print("any NaN in X?", X.isnull().sum().sum())

In [None]:
#training and predicting through decision tree model
tree_pipe.fit(X_train, y_train)
pred_1 = tree_pipe.predict(X_test)
score_1 = accuracy_score(y_test, pred_1)
print(f"Prediction by Decison tree is : {score_1:.4f}")

#training and predicting through random forset classifier model
rf_pipe.fit(X_train, y_train)
pred_2 = rf_pipe.predict(X_test)
score_2 = accuracy_score(y_test, pred_2)
print(f"Prediction by Random forest classifier is: {score_2:.4f}")

#training and predicting through K-Neighbour Classifier KNN model
knn_pipe.fit(X_train, y_train)
pred_3 = knn_pipe.predict(X_test)
score_3 = accuracy_score(y_test, pred_3)
print(f"Predicion by KNN is: {score_3:.4f}")

In [None]:
import joblib
scores = {'Decision Tree' : score_1,
          'Random Forest' : score_2,
          'KNN' : score_3}
best_name = max (scores, key=scores.get)
best_model = {'Decision Tree': tree_pipe,
              'Random Forest Classifier': rf_pipe,
              'KNN': knn_pipe}[best_name]
# print(f"Best model is {best_model} with accuracy score {best_name}")
joblib.dump(best_model, "Best_Penguin_model.pkl")

In [None]:
load_model = joblib.load("Penguin_Day2_Best.pkl")
a= float(input("Enter bill length (mm): "))
b=float(input("Enter bill depth (mm): "))
c=float(input("Enter flipper length (mm): "))
d=float(input("Enter body mass (g): "))
user_val = [[a, b, c, d]]
u_pred = load_model.predict(user_val)[0]
print(f"Prediction by the best model {load_model} is {u_pred}")