In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [4]:
data = pd.read_csv('../data/abalone_2.csv')

In [5]:
data.head()

Unnamed: 0,Length,Diameter,Height,Whole_weight,Viscera_weight,Shell_weight,Ages
0,0.455,0.365,0.095,0.514,0.101,0.15,16.5
1,0.35,0.265,0.09,0.2255,0.0485,0.07,8.5
2,0.53,0.42,0.135,0.677,0.1415,0.21,10.5
3,0.44,0.365,0.125,0.516,0.114,0.155,11.5
4,0.33,0.255,0.08,0.205,0.0395,0.055,8.5


In [6]:
data_abalone = data[data.Height < 0.4]

In [7]:
data_abalone.shape

(4175, 7)

In [8]:
data_x = data_abalone.drop('Ages', axis=1)

In [9]:
data_y = data_abalone[['Ages']]

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
train_x, test_x, train_y, test_y = train_test_split(
    data_x, data_y, test_size=0.25, random_state=42
)

In [13]:
print('Ukuran Atribut Pelatihan : ', train_x.shape)
print('Ukuran Atribut Pengujian : ', test_x.shape)
print('Ukuran Label Pelatihan   : ', train_y.shape)
print('Ukuran Label Pengujian   : ', test_y.shape)

Ukuran Atribut Pelatihan :  (3131, 6)
Ukuran Atribut Pengujian :  (1044, 6)
Ukuran Label Pelatihan   :  (3131, 1)
Ukuran Label Pengujian   :  (1044, 1)


In [14]:
from sklearn.preprocessing import StandardScaler

In [15]:
scaler = StandardScaler()

In [16]:
train_x_scl = scaler.fit_transform(train_x)

In [17]:
test_x_scl = scaler.transform(test_x)

In [18]:
from sklearn.metrics import mean_squared_error

In [19]:
def evaluation(model, model_name):
    train_pred = model.predict(train_x_scl)
    test_pred = model.predict(test_x_scl)
    
    train_mse = mean_squared_error(train_y, train_pred)
    test_mse = mean_squared_error(test_y, test_pred)
    
    train_rmse = mean_squared_error(train_y, train_pred, squared=False)
    test_rmse = mean_squared_error(test_y, test_pred, squared=False)
    
    print(f'Model: {model_name}')
    print(f'Train MSE    : {train_mse}')
    print(f'Train RMSE   : {train_rmse}')
    print(f'Test MSE     : {test_mse}')
    print(f'Test RMSE    : {test_rmse}')

In [20]:
from sklearn.linear_model import LinearRegression

In [21]:
lin_reg = LinearRegression()

In [22]:
lin_reg.fit(train_x_scl, train_y)

LinearRegression()

In [23]:
evaluation(lin_reg, 'Linear Regression')

Model: Linear Regression
Train MSE    : 5.62350982179778
Train RMSE   : 2.3713940671676186
Test MSE     : 5.209488501414955
Test RMSE    : 2.282430393553099


In [24]:
from sklearn.ensemble import RandomForestRegressor

In [25]:
rf_reg = RandomForestRegressor(n_estimators=500)
rf_reg.fit(train_x_scl, train_y.values.ravel())

RandomForestRegressor(n_estimators=500)

In [26]:
evaluation(rf_reg, 'Random Forest Regressor 500')

Model: Random Forest Regressor 500
Train MSE    : 0.756253318428617
Train RMSE   : 0.8696282645065172
Test MSE     : 4.982556475095786
Test RMSE    : 2.232164078892003
