In [170]:
#Importing required libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [171]:
# Reading data from dataset file 
df  = pd.read_csv('Fish.csv')
print(df)

    Species  Weight  Length1  Length2  Length3   Height   Width
0     Bream   242.0     23.2     25.4     30.0  11.5200  4.0200
1     Bream   290.0     24.0     26.3     31.2  12.4800  4.3056
2     Bream   340.0     23.9     26.5     31.1  12.3778  4.6961
3     Bream   363.0     26.3     29.0     33.5  12.7300  4.4555
4     Bream   430.0     26.5     29.0     34.0  12.4440  5.1340
..      ...     ...      ...      ...      ...      ...     ...
154   Smelt    12.2     11.5     12.2     13.4   2.0904  1.3936
155   Smelt    13.4     11.7     12.4     13.5   2.4300  1.2690
156   Smelt    12.2     12.1     13.0     13.8   2.2770  1.2558
157   Smelt    19.7     13.2     14.3     15.2   2.8728  2.0672
158   Smelt    19.9     13.8     15.0     16.2   2.9322  1.8792

[159 rows x 7 columns]


In [172]:
# Data Preprocessing and Data Cleaning

#Checking if there is any null value in dataset 

df.isnull().any()


Species    False
Weight     False
Length1    False
Length2    False
Length3    False
Height     False
Width      False
dtype: bool

In [173]:
# Because we are going to classify inputs based on species, transforming species column and assigning the numeric values 
encoder = LabelEncoder()
df['Species'] = encoder.fit_transform(df['Species'])

# Creating dictionary named species that contains key-value 
species = {index: type for index, type in enumerate(encoder.classes_)}
print(species)

{0: 'Bream', 1: 'Parkki', 2: 'Perch', 3: 'Pike', 4: 'Roach', 5: 'Smelt', 6: 'Whitefish'}


In [174]:
# Generating X and Y

x = df.drop('Species', axis = 1)
#print(x)
y = df['Species']

In [175]:
# Training and testing with train size 80% and test size 20% 
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.3, random_state=0)
# sc = StandardScaler()
# x_train = sc.fit_transform(x_train)
# x_test = sc.transform(x_test)
# print(x_train, x_test)

In [176]:
# Creating logistic regression model

# model = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,intercept_scaling=1, l1_ratio=None, 
#                            max_iter=1500,multi_class='auto', n_jobs=None, penalty='l2',random_state=123, solver='lbfgs', 
#                            tol=0.0001, verbose=0,warm_start=False)

model = LogisticRegression(max_iter=50000)
# Fitting training data in the model
model.fit(x_train,y_train)

# Making prediction and calculating accuracy
prediction = model.predict(x_test)
accuracy = accuracy_score(y_test,prediction)

print("Model Prediction:",prediction,"\n")
print("Model Accuracy(%):",accuracy*100)

Model Prediction: [0 5 2 4 2 2 2 3 0 2 3 3 2 2 1 4 4 2 2 0 1 6 2 3 1 0 0 4 2 5 0 2 2 3 0 2 2
 0 2 2 2 2 2 0 5 2 4 2] 

Model Accuracy(%): 85.41666666666666


In [177]:
test = model.predict([[242,23.2,25.4,30,11.52,4.02]])
print(test)
print(species.get(test[0]))

[0]
Bream


In [178]:
# Create a Pickle file 
import pickle
pickle_out = open("fish_classification.pkl","wb")
pickle.dump(model,pickle_out)
pickle_out.close()
print(pickle_out)

<_io.BufferedWriter name='fish_classification.pkl'>
