In [149]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
import pickle
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Read the CSV file
penguins = pd.read_csv("data/penguins.csv")
penguins_original = pd.read_csv("data/penguins.csv")

# Select columns with float64 data type
penguins = penguins.select_dtypes(include='float64')

# Remove specific columns & Empty Rows
columns_to_remove = ["Delta 15 N (o/oo)", "Delta 13 C (o/oo)"]
penguins = penguins.drop(columns=columns_to_remove)

# Include the "Species" column from the original sheet
species_column = penguins_original['Species']
penguins = pd.concat([penguins, species_column], axis=1)

penguins = penguins.dropna()
print(penguins)

     Culmen Length (mm)  Culmen Depth (mm)  Flipper Length (mm)  \
0                  39.1               18.7                181.0   
1                  39.5               17.4                186.0   
2                  40.3               18.0                195.0   
4                  36.7               19.3                193.0   
5                  39.3               20.6                190.0   
..                  ...                ...                  ...   
339                55.8               19.8                207.0   
340                43.5               18.1                202.0   
341                49.6               18.2                193.0   
342                50.8               19.0                210.0   
343                50.2               18.7                198.0   

     Body Mass (g)                                    Species  
0           3750.0        Adelie Penguin (Pygoscelis adeliae)  
1           3800.0        Adelie Penguin (Pygoscelis adeliae)  
2  

In [150]:
d = {'Adelie Penguin (Pygoscelis adeliae)': 0, 'Chinstrap penguin (Pygoscelis antarctica)': 1, 'Gentoo penguin (Pygoscelis papua)': 2}
penguins['Species'] = penguins['Species'].map(d)


penguins.head()

Unnamed: 0,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Species
0,39.1,18.7,181.0,3750.0,0
1,39.5,17.4,186.0,3800.0,0
2,40.3,18.0,195.0,3250.0,0
4,36.7,19.3,193.0,3450.0,0
5,39.3,20.6,190.0,3650.0,0


In [151]:
features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']

X = penguins[features]
y = penguins['Species']

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X)
print(y)

     Culmen Length (mm)  Culmen Depth (mm)  Flipper Length (mm)  Body Mass (g)
0                  39.1               18.7                181.0         3750.0
1                  39.5               17.4                186.0         3800.0
2                  40.3               18.0                195.0         3250.0
4                  36.7               19.3                193.0         3450.0
5                  39.3               20.6                190.0         3650.0
..                  ...                ...                  ...            ...
339                55.8               19.8                207.0         4000.0
340                43.5               18.1                202.0         3400.0
341                49.6               18.2                193.0         3775.0
342                50.8               19.0                210.0         4100.0
343                50.2               18.7                198.0         3775.0

[342 rows x 4 columns]
0      0
1      0
2      0
4

In [152]:
mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)
mlp.fit(X_train, y_train.values.ravel())

	39.5	17.4	186.0	3800.0	0 -> Adelie Penguin

In [153]:
print(mlp.predict([[39.5, 17.4, 186.0, 3800.0]]))

[2]


In [154]:
#Predict the response for test dataset
y_pred = mlp.predict(X_test)

# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.970873786407767


In [155]:
predictions = mlp.predict(X_test)
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

[[45  0  0]
 [ 3 19  0]
 [ 0  0 36]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        45
           1       1.00      0.86      0.93        22
           2       1.00      1.00      1.00        36

    accuracy                           0.97       103
   macro avg       0.98      0.95      0.96       103
weighted avg       0.97      0.97      0.97       103



In [156]:
pickle.dump(mlp , open('penguin_model' , 'wb'))