In [None]:
from sklearn.model_selection import GridSearchCV
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
frame = sns.load_dataset('Penguins')
df = frame.dropna()
print("Dataset has been loaded successfully: \n")

X = df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]
y = df['species']

In [None]:
parameters = [
    {
        'estimator' : [DecisionTreeClassifier()],
        'estimator__max_depth' : [3, 5, 10, None],
        'estimator__min_samples_split' : [2, 5, 10, 20]
    },
    {
        'estimator' : [RandomForestClassifier()],
        'estimator__n_estimators' : [50, 100, 200],
        'estimator__max_features' : ['sqrt', None]
    },
    {
        'estimator' : [KNeighborsClassifier()],
        'estimator__n_neighbors' : [3, 5, 7, 9],
        'estimator__weights' : ['uniform', 'distance']
    }
]

pipe = Pipeline([
    ('scaler', StandardScaler()),           # KNN needs scaling, others ignore it safely
    ('estimator', DecisionTreeClassifier())       # dummy starter, will be replaced
])
grid = GridSearchCV(
    estimator = pipe,
    # estimator=DecisionTreeClassifier(),
    # estimator=None,
    param_grid = parameters,
    cv = 5,
    scoring = 'accuracy',
    # n_jobs=-1,
    refit = True

)
# grid = GridSearchCV(pipe, parameters, cv=5, scoring='accuracy', n_jobs=-1, refit=True, error_score='raise')
grid.fit(X, y)
print("Data has been trained!")

In [None]:
print("Best score is : ", grid.best_score_)
print("Best parameter is :", grid.best_params_)
print("Best model is: ", grid.best_estimator_)

In [None]:
import joblib
joblib.dump(grid.best_estimator_, 'Best_model_penguin.pkl')

In [None]:
a = float(input("Enter bill length (mm): "))
b = float(input("Enter bill depth (mm): "))
c = float(input("Enter flipper length (mm): "))
d = float(input("Enter body mass (g): "))

u_val = [[a, b, c, d]]
load_model = joblib.load('Best_Penguin_model.pkl')
print(f"Predicted answer by best model {grid.best_estimator_} is: ", load_model.predict(u_val)[0])
