In [133]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from math import sqrt
from category_encoders.binary import BinaryEncoder


df = pd.read_csv('../data/CrabAgePrediction.csv')

bn = BinaryEncoder()
data_category = bn.fit_transform(df.select_dtypes(include=['object'])).astype(int)
data_num = df.select_dtypes(exclude=['object'])
df = pd.concat([data_num, pd.DataFrame(data_category)], axis=1)
pd.set_option('display.max_columns', None)

x_reg=df.drop(['Age'],axis=1)
y_reg=df['Age']

In [134]:
df.isna().sum()

Length            0
Diameter          0
Height            0
Weight            0
Shucked Weight    0
Viscera Weight    0
Shell Weight      0
Age               0
Sex_0             0
Sex_1             0
dtype: int64

In [135]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_reg, y_reg, test_size=0.33, shuffle=False)

In [136]:
pipeline = Pipeline(
    [
        ("scaler", MinMaxScaler()),
        ("polynomial", PolynomialFeatures(1)),
        ("model", LinearRegression()),
    ]
)

In [137]:
pipeline.fit(x_train, y_train)
y_pred = pipeline.predict(x_test)

print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'RMSE: {sqrt(mean_squared_error(y_test, y_pred))}')
print(f'MAPE: {mean_absolute_percentage_error(y_test, y_pred)}')
print(f'R^2: {pipeline.score(x_test, y_test)}')


MAE: 1.6142414565764933
MSE: 4.7959773949177205
RMSE: 2.1899720077931866
MAPE: 0.16422918096333727
R^2: 0.5536257155542584
