In [2]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# -----------------------------
# Load Dataset
# -----------------------------
penguins = sns.load_dataset("penguins")

# Remove missing rows
penguins.dropna(inplace=True)
penguins

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,Male
...,...,...,...,...,...,...,...
338,Gentoo,Biscoe,47.2,13.7,214.0,4925.0,Female
340,Gentoo,Biscoe,46.8,14.3,215.0,4850.0,Female
341,Gentoo,Biscoe,50.4,15.7,222.0,5750.0,Male
342,Gentoo,Biscoe,45.2,14.8,212.0,5200.0,Female


In [3]:
from sklearn.preprocessing import LabelEncoder

# Encode Categorical Columns

le_sex = LabelEncoder()
le_island = LabelEncoder()

penguins["sex"] = le_sex.fit_transform(penguins["sex"])
penguins["island"] = le_island.fit_transform(penguins["island"])

penguins

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,2,39.1,18.7,181.0,3750.0,1
1,Adelie,2,39.5,17.4,186.0,3800.0,0
2,Adelie,2,40.3,18.0,195.0,3250.0,0
4,Adelie,2,36.7,19.3,193.0,3450.0,0
5,Adelie,2,39.3,20.6,190.0,3650.0,1
...,...,...,...,...,...,...,...
338,Gentoo,0,47.2,13.7,214.0,4925.0,0
340,Gentoo,0,46.8,14.3,215.0,4850.0,0
341,Gentoo,0,50.4,15.7,222.0,5750.0,1
342,Gentoo,0,45.2,14.8,212.0,5200.0,0


In [4]:
# Features & Target

X = penguins.drop("species", axis=1)
y = penguins["species"]


# Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [5]:
X_train

Unnamed: 0,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
321,0,55.9,17.0,228.0,5600.0,1
265,0,43.6,13.9,217.0,4900.0,0
36,1,38.8,20.0,190.0,3950.0,1
308,0,47.5,14.0,212.0,4875.0,0
191,1,53.5,19.9,205.0,4500.0,1
...,...,...,...,...,...,...
194,1,50.9,19.1,196.0,3550.0,1
77,2,37.2,19.4,184.0,3900.0,1
112,0,39.7,17.7,193.0,3200.0,0
277,0,45.5,15.0,220.0,5000.0,1


In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [7]:
# Model Training

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluation

pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))


Accuracy: 0.9761904761904762


## User Input for Prediction

In [8]:
# Taking inputs
bill_length = float(input("Bill Length (mm): "))
bill_depth  = float(input("Bill Depth (mm): "))
flipper_len = float(input("Flipper Length (mm): "))
body_mass   = float(input("Body Mass (g): "))
sex_input   = input("Sex (male/female): ")
island_input = input("Island (Dream/Biscoe/Torgersen): ")

In [9]:
# Create dataframe for prediction
input_data = pd.DataFrame([[
    island_input,bill_length, bill_depth, flipper_len, body_mass,sex_input]],
                          columns=['island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm',
       'body_mass_g', 'sex'])


input_data

Unnamed: 0,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Biscoe,12.0,12.0,12.0,12.0,Male


In [10]:
# Encode user categorical inputs using same encoders

input_data["sex"] = le_sex.transform(input_data["sex"])
input_data["island"] = le_island.transform(input_data["island"])

input_data

Unnamed: 0,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,0,12.0,12.0,12.0,12.0,1


In [11]:
input_data = scaler.transform(input_data)

input_data

array([[ -0.91502381,  -5.98284313,  -2.63162461, -13.66562746,
         -5.1723424 ,   0.96448564]])

In [12]:
# Prediction
result = model.predict(input_data)
print("\nPredicted Species:", result[0])


Predicted Species: Adelie


## Save


In [13]:
import pickle

pickle.dump(model, open("final_model.pkl","wb"))

In [14]:
# import joblib

# joblib.dump(model, "model.pkl")

In [15]:
pickle.dump(scaler, open("scaler.pkl","wb"))
pickle.dump(le_sex, open("sex_encoder.pkl","wb"))
pickle.dump(le_island, open("island_encoder.pkl","wb"))

In [16]:
pip show scikit-learn

Name: scikit-learn
Version: 1.4.0
Summary: A set of python modules for machine learning and data mining
Home-page: https://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: c:\Users\Admin\anaconda3\Lib\site-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: daal4py, imbalanced-learn, librosa, lime, pmdarima, scikit-learn-intelex, torch_geometric
Note: you may need to restart the kernel to use updated packages.
