# Abalone Age Prediction

## Import the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import quantile_transform
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import OneHotEncoder  ##. better to use dummy from pandas 
from sklearn.preprocessing import PowerTransformer
from scipy.stats import boxcox
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from scipy.stats import boxcox
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
pd.options.display.max_rows = 50

  from pandas import Int64Index as NumericIndex


In [2]:
df_abalone = pd.read_csv("abalone.csv")
df_abalone

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


## Lower Case all the columns

In [3]:
def lower_case_column_names(df_abalone):
    df_abalone.columns=[i.lower() for i in df_abalone.columns]
    return df_abalone

In [4]:
df_abalone = lower_case_column_names(df_abalone)
df_abalone

Unnamed: 0,sex,length,diameter,height,whole weight,shucked weight,viscera weight,shell weight,rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


## Checking the DF

In [5]:
df_abalone.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   sex             4177 non-null   object 
 1   length          4177 non-null   float64
 2   diameter        4177 non-null   float64
 3   height          4177 non-null   float64
 4   whole weight    4177 non-null   float64
 5   shucked weight  4177 non-null   float64
 6   viscera weight  4177 non-null   float64
 7   shell weight    4177 non-null   float64
 8   rings           4177 non-null   int64  
dtypes: float64(7), int64(1), object(1)
memory usage: 293.8+ KB


In [6]:
df_abalone["sex"].value_counts()

M    1528
I    1342
F    1307
Name: sex, dtype: int64

In [7]:
df_abalone.describe()

Unnamed: 0,length,diameter,height,whole weight,shucked weight,viscera weight,shell weight,rings
count,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0
mean,0.523992,0.407881,0.139516,0.828742,0.359367,0.180594,0.238831,9.933684
std,0.120093,0.09924,0.041827,0.490389,0.221963,0.109614,0.139203,3.224169
min,0.075,0.055,0.0,0.002,0.001,0.0005,0.0015,1.0
25%,0.45,0.35,0.115,0.4415,0.186,0.0935,0.13,8.0
50%,0.545,0.425,0.14,0.7995,0.336,0.171,0.234,9.0
75%,0.615,0.48,0.165,1.153,0.502,0.253,0.329,11.0
max,0.815,0.65,1.13,2.8255,1.488,0.76,1.005,29.0


In [8]:
df_abalone.isna().sum()

sex               0
length            0
diameter          0
height            0
whole weight      0
shucked weight    0
viscera weight    0
shell weight      0
rings             0
dtype: int64

## Adding the age column

In [9]:
df_abalone["age"] = df_abalone["rings"] + 1.5
df_abalone

Unnamed: 0,sex,length,diameter,height,whole weight,shucked weight,viscera weight,shell weight,rings,age
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15,16.5
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7,8.5
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9,10.5
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10,11.5
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7,8.5
...,...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11,12.5
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10,11.5
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9,10.5
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10,11.5


## Changing Column names

In [10]:
df_abalone.rename(columns={"shucked weight": "shucked/meat weight"}, inplace=True)
df_abalone.rename(columns={"viscera weight": "gut weight"}, inplace=True)

In [11]:
df_abalone

Unnamed: 0,sex,length,diameter,height,whole weight,shucked/meat weight,gut weight,shell weight,rings,age
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15,16.5
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7,8.5
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9,10.5
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10,11.5
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7,8.5
...,...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11,12.5
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10,11.5
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9,10.5
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10,11.5


## Dropping Duplicates

In [12]:
df_abalone.drop_duplicates()
df_abalone.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   sex                  4177 non-null   object 
 1   length               4177 non-null   float64
 2   diameter             4177 non-null   float64
 3   height               4177 non-null   float64
 4   whole weight         4177 non-null   float64
 5   shucked/meat weight  4177 non-null   float64
 6   gut weight           4177 non-null   float64
 7   shell weight         4177 non-null   float64
 8   rings                4177 non-null   int64  
 9   age                  4177 non-null   float64
dtypes: float64(8), int64(1), object(1)
memory usage: 326.5+ KB


## Transform columns from decemeters to cm

In [13]:
df_abalone[["length","diameter", "height"]]= df_abalone[["length","diameter", "height"]]*10                                                  

In [14]:
df_abalone

Unnamed: 0,sex,length,diameter,height,whole weight,shucked/meat weight,gut weight,shell weight,rings,age
0,M,4.55,3.65,0.95,0.5140,0.2245,0.1010,0.1500,15,16.5
1,M,3.50,2.65,0.90,0.2255,0.0995,0.0485,0.0700,7,8.5
2,F,5.30,4.20,1.35,0.6770,0.2565,0.1415,0.2100,9,10.5
3,M,4.40,3.65,1.25,0.5160,0.2155,0.1140,0.1550,10,11.5
4,I,3.30,2.55,0.80,0.2050,0.0895,0.0395,0.0550,7,8.5
...,...,...,...,...,...,...,...,...,...,...
4172,F,5.65,4.50,1.65,0.8870,0.3700,0.2390,0.2490,11,12.5
4173,M,5.90,4.40,1.35,0.9660,0.4390,0.2145,0.2605,10,11.5
4174,M,6.00,4.75,2.05,1.1760,0.5255,0.2875,0.3080,9,10.5
4175,F,6.25,4.85,1.50,1.0945,0.5310,0.2610,0.2960,10,11.5


## Transform kg to g

In [15]:
df_abalone[["whole weight","shucked/meat weight","gut weight", "shell weight"]]= df_abalone[["whole weight","shucked/meat weight","gut weight", "shell weight"]]*1000

In [16]:
df_abalone

Unnamed: 0,sex,length,diameter,height,whole weight,shucked/meat weight,gut weight,shell weight,rings,age
0,M,4.55,3.65,0.95,514.0,224.5,101.0,150.0,15,16.5
1,M,3.50,2.65,0.90,225.5,99.5,48.5,70.0,7,8.5
2,F,5.30,4.20,1.35,677.0,256.5,141.5,210.0,9,10.5
3,M,4.40,3.65,1.25,516.0,215.5,114.0,155.0,10,11.5
4,I,3.30,2.55,0.80,205.0,89.5,39.5,55.0,7,8.5
...,...,...,...,...,...,...,...,...,...,...
4172,F,5.65,4.50,1.65,887.0,370.0,239.0,249.0,11,12.5
4173,M,5.90,4.40,1.35,966.0,439.0,214.5,260.5,10,11.5
4174,M,6.00,4.75,2.05,1176.0,525.5,287.5,308.0,9,10.5
4175,F,6.25,4.85,1.50,1094.5,531.0,261.0,296.0,10,11.5
