In [283]:
import pandas as pd
import numpy as np
import plotly.express as ex
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

data = pd.read_csv('..\dataset\melon.csv')
data.head(50)


invalid escape sequence '\d'


invalid escape sequence '\d'


invalid escape sequence '\d'



Unnamed: 0,Weight,Price
0,1,50
1,2,100
2,3,140
3,4,180
4,5,220
5,6,250
6,7,300
7,8,340
8,9,380
9,10,390


In [284]:
X = data.drop('Price', axis=1)
y = data['Price']

In [285]:
poly = PolynomialFeatures(degree=3, include_bias=False)
data_converted = poly.fit_transform(data)

In [286]:
print(f'Shape before conversion: {data.shape}')
print(f'Shape after conversion: {data_converted.shape}')

Shape before conversion: (10, 2)
Shape after conversion: (10, 9)


In [287]:
X_train, X_test, y_train, y_test = train_test_split(data_converted, y, test_size=0.3, random_state=101)

In [288]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [289]:
scaler.fit(X_train)

0,1,2
,copy,True
,with_mean,True
,with_std,True


In [290]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [291]:
from sklearn.linear_model import Ridge

ridge_ml = Ridge(alpha=10)

In [292]:
ridge_ml.fit(X_train, y_train)

0,1,2
,alpha,10
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [293]:
y_pred = ridge_ml.predict(X_test)

In [294]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [295]:
print(f'MAE: {mae:.5f}')
print(f'R2: {r2:.5f}')
print(f'RMSE: {rmse:.5f}')

MAE: 43.87502
R2: 0.85380
RMSE: 53.25697


0.85 is a good value, but it's still possible to get a better r² value

In [296]:
from sklearn.linear_model import RidgeCV

ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 10.0))
ridge_cv_model.fit(X_train, y_train)

0,1,2
,alphas,"(0.1, ...)"
,fit_intercept,True
,scoring,
,cv,
,gcv_mode,
,store_cv_results,False
,alpha_per_target,False


In [297]:
ridge_cv_model.alpha_

np.float64(0.1)

In [298]:
y_pred = ridge_cv_model.predict(X_test)

In [299]:
MAE = mean_absolute_error(y_test, y_pred)
r2_cv = r2_score(y_test, y_pred)
RMSE_cv = np.sqrt(mean_squared_error(y_test, y_pred))

In [300]:
print(f'MAE: {MAE:.5f}')
print(f'R2: {r2_cv:.5f}')
print(f'RMSE: {RMSE_cv:.5f}')

MAE: 5.28080
R2: 0.99667
RMSE: 8.04294


In [301]:
results = pd.DataFrame({
    'Atual': y_test,
    'Previsto': y_pred
})

results.head()

Unnamed: 0,Atual,Previsto
8,380,378.506788
2,140,140.507972
0,50,63.841205


In [302]:
fig = ex.scatter(
    results, 
    x='Atual', 
    y='Previsto', 
    title='Valores reais vs previstos'
)

fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=results['Atual'].min(), y0=results['Atual'].min(),
    x1=results['Atual'].max(), y1=results['Atual'].max()
)
fig.show()

## Resíduos do modelo

In [322]:
err = y_test - y_pred

fig = ex.histogram(err,
                   nbins=50,
                   title='Distribuição dos erros (resíduos)'
                )

fig.update_layout(
    xaxis_title = 'Resíduo',
    legend_title_text = 'Legenda',
    title_x = 0.5
)

fig.show()

In [304]:
err

8     1.493212
2    -0.507972
0   -13.841205
Name: Price, dtype: float64

In [307]:
metrics = pd.DataFrame({
    'Modelo': ['Ridge (α=10)', 'RidgeCV'],
    'MAE': [mae, MAE],
    'R2': [r2, r2_cv],
    'RMSE': [rmse, RMSE_cv]
})

metrics

Unnamed: 0,Modelo,MAE,R2,RMSE
0,Ridge (α=10),43.875016,0.853799,53.256967
1,RidgeCV,5.280796,0.996666,8.04294


In [320]:
fig = ex.bar(metrics, 
             x='Modelo', 
             y=['R2', 'MAE', 'RMSE'],
             barmode='group', 
             text_auto='.3f',
             title='Comparação de Métricas dos Modelos',
             color_discrete_sequence=["#63FACD", "#4D7ED8", "#F85A91"])

fig.update_layout(
    legend_title_text='Métricas',
    title_font_color='darkblue',
    title_x=0.5,
    xaxis_title='Modelos',
    yaxis_title='Valores'
)

fig.show(config={'displayModeBar': False})

In [326]:
print(f'''Depois de treinar o modelo, utilizando scalonamento das features e regularização do modelo com Ridge (L1), conclui-se que o resultado obtido foi satisfatório.\n 
Métricas:
R2 antigo: {r2:.5f} -> Melhor R2: {r2_cv:.5f}
MAE antigo: {mae:.5f} -> Melhor MAE: {MAE:.5f}
RMSE antigo: {rmse:.5f} -> Melhor RMSE: {RMSE_cv:.5f}
      
Conclui-se, assim, que o melhor alpha foi o de 0.1.
     '''
      )

Depois de treinar o modelo, utilizando scalonamento das features e regularização do modelo com Ridge (L1), conclui-se que o resultado obtido foi satisfatório.
 
Métricas:
R2 antigo: 0.85380 -> Melhor R2: 0.99667
MAE antigo: 43.87502 -> Melhor MAE: 5.28080
RMSE antigo: 53.25697 -> Melhor RMSE: 8.04294
      
Conclui-se, assim, que o melhor alpha foi o de 0.1.
     
