# Metrics & Model Evaluation for Regression
Pembahasan ini lebih dominan ke matematika mean absolute error, root mean squared error, mean squared error, r2 score

In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error, r2_score

In [2]:
df = pd.read_csv('insurance.csv')

In [3]:
df.sample(5)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
1128,34,male,32.8,1,no,southwest,14358.36437
334,43,female,35.72,2,no,northeast,19144.57652
1064,29,female,25.6,4,no,southwest,5708.867
274,25,male,27.55,0,no,northwest,2523.1695
1145,52,male,32.775,3,no,northwest,11289.10925


In [4]:
df = pd.get_dummies(df, columns=['sex', 'smoker', 'region'], drop_first=True)
# get_dumies() itu hanya one hot encode aja. convert categorical variable to dummy variable

In [5]:
df.sample(4)

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes,region_northwest,region_southeast,region_southwest
784,31,29.26,1,4350.5144,False,False,False,True,False
1291,19,34.9,0,34828.654,True,True,False,False,True
1176,52,24.13,1,23887.6627,False,True,True,False,False
851,61,32.3,2,14119.62,True,False,True,False,False


In [6]:
x = df.drop('charges', axis=1)
y = df['charges']

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [8]:
model = Pipeline([
    ('scaler', StandardScaler()),
    ('poly_feature', PolynomialFeatures()),
    ('regressor', LinearRegression())
])

In [9]:
model.fit(X_train, Y_train)

0,1,2
,steps,"[('scaler', ...), ('poly_feature', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,degree,2
,interaction_only,False
,include_bias,True
,order,'C'

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [None]:
y_pred = model.predict(X_test)

In [None]:
mae = mean_absolute_error(Y_test, y_pred)
mse = mean_squared_error(Y_test, y_pred)
rmse = root_mean_squared_error(Y_test, y_pred)
r2 = r2_score(Y_test, y_pred)

print(f'MAE : {mae:.2f}')
print(f'MSE : {mse:.2f}')
print(f'RMSE : {rmse:.2f}')
print(f'R2 : {r2:.2f}')


MAE : 2729.50
MSE : 20712805.99
RMSE : 4551.13
R2 : 0.87


In [None]:
n = x.shape[0]
p = x.shape[1]

adjusted_r_squared = 1 - (1 - r2) / ((n - 1) / (n - p - 1))

print(f'Adjusted R^2 = {adjusted_r_squared:.2f}')


Adjusted R^2 = 0.87
