# Importing Python Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Reading Dataset

In [2]:
data = pd.read_csv('Fish_dataset.csv')
data

Unnamed: 0,ID,Species,Weight,Length1,Length2,Length3,Height,Width
0,1,Bream,242.0,23.2,25.4,30.0,11.5200,4.0200
1,2,Bream,290.0,24.0,26.3,31.2,12.4800,4.3056
2,3,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,4,Bream,363.0,26.3,29.0,33.5,12.7300,4.4555
4,5,Bream,430.0,26.5,29.0,34.0,12.4440,5.1340
...,...,...,...,...,...,...,...,...
154,155,Smelt,12.2,11.5,12.2,13.4,2.0904,1.3936
155,156,Smelt,13.4,11.7,12.4,13.5,2.4300,1.2690
156,157,Smelt,12.2,12.1,13.0,13.8,2.2770,1.2558
157,158,Smelt,19.7,13.2,14.3,15.2,2.8728,2.0672


# Exploring Dataset

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   ID       159 non-null    int64  
 1   Species  159 non-null    object 
 2   Weight   159 non-null    float64
 3   Length1  159 non-null    float64
 4   Length2  159 non-null    float64
 5   Length3  159 non-null    float64
 6   Height   159 non-null    float64
 7   Width    159 non-null    float64
dtypes: float64(6), int64(1), object(1)
memory usage: 10.1+ KB


In [4]:
data.describe()

Unnamed: 0,ID,Weight,Length1,Length2,Length3,Height,Width
count,159.0,159.0,159.0,159.0,159.0,159.0,159.0
mean,80.0,398.326415,26.24717,28.415723,31.227044,8.970994,4.417486
std,46.043458,357.978317,9.996441,10.716328,11.610246,4.286208,1.685804
min,1.0,0.0,7.5,8.4,8.8,1.7284,1.0476
25%,40.5,120.0,19.05,21.0,23.15,5.9448,3.38565
50%,80.0,273.0,25.2,27.3,29.4,7.786,4.2485
75%,119.5,650.0,32.7,35.5,39.65,12.3659,5.5845
max,159.0,1650.0,59.0,63.4,68.0,18.957,8.142


# Checking Null Values

In [5]:
data.isnull().value_counts()

ID     Species  Weight  Length1  Length2  Length3  Height  Width
False  False    False   False    False    False    False   False    159
dtype: int64

In [6]:
data.isna().value_counts()

ID     Species  Weight  Length1  Length2  Length3  Height  Width
False  False    False   False    False    False    False   False    159
dtype: int64

# Checking Duplicates

In [7]:
data.duplicated().value_counts()

False    159
dtype: int64

# Conversion of Categorical Feature into Numeric"

In [8]:
data['Species'].value_counts()

Perch        56
Bream        35
Roach        20
Pike         17
Smelt        14
Parkki       11
Whitefish     6
Name: Species, dtype: int64

* Perch        0
* Bream        1
* Roach        2
* Pike         3
* Smelt        4
* Parkki       5
* Whitefish    6

In [10]:
data['Species']= data['Species'].replace('Perch',0)
data['Species']= data['Species'].replace('Bream',1)
data['Species']= data['Species'].replace('Roach',2)
data['Species']= data['Species'].replace('Pike',3)
data['Species']= data['Species'].replace('Smelt',4)
data['Species']= data['Species'].replace('Parkki',5)
data['Species']= data['Species'].replace('Whitefish',6)

In [11]:
data

Unnamed: 0,ID,Species,Weight,Length1,Length2,Length3,Height,Width
0,1,1,242.0,23.2,25.4,30.0,11.5200,4.0200
1,2,1,290.0,24.0,26.3,31.2,12.4800,4.3056
2,3,1,340.0,23.9,26.5,31.1,12.3778,4.6961
3,4,1,363.0,26.3,29.0,33.5,12.7300,4.4555
4,5,1,430.0,26.5,29.0,34.0,12.4440,5.1340
...,...,...,...,...,...,...,...,...
154,155,4,12.2,11.5,12.2,13.4,2.0904,1.3936
155,156,4,13.4,11.7,12.4,13.5,2.4300,1.2690
156,157,4,12.2,12.1,13.0,13.8,2.2770,1.2558
157,158,4,19.7,13.2,14.3,15.2,2.8728,2.0672


# Feature Scaling - Standardization

In [12]:
column_headers = [ "ID",  "Species", "Weight", "Length1", "Length2", "Length3", "Height", "Width"]
std_scaler = StandardScaler()
data = pd.DataFrame(std_scaler.fit_transform(data), columns=column_headers)

In [13]:
data

Unnamed: 0,ID,Species,Weight,Length1,Length2,Length3,Height,Width
0,-1.721191,-0.398670,-0.438072,-0.305789,-0.282303,-0.106020,0.596579,-0.236529
1,-1.699404,-0.398670,-0.303562,-0.225507,-0.198054,-0.002337,0.821261,-0.066579
2,-1.677617,-0.398670,-0.163448,-0.235542,-0.179332,-0.010977,0.797341,0.165793
3,-1.655830,-0.398670,-0.098995,0.005302,0.054694,0.196390,0.879771,0.022621
4,-1.634043,-0.398670,0.088759,0.025372,0.054694,0.239592,0.812835,0.426371
...,...,...,...,...,...,...,...,...
154,1.634043,1.269448,-1.082039,-1.479903,-1.517960,-1.540309,-1.610359,-1.799403
155,1.655830,1.269448,-1.078676,-1.459833,-1.499238,-1.531669,-1.530878,-1.873547
156,1.677617,1.269448,-1.082039,-1.419692,-1.443072,-1.505748,-1.566687,-1.881402
157,1.699404,1.269448,-1.061022,-1.309305,-1.321378,-1.384784,-1.427243,-1.398568
