In [1]:
import pandas as pd  
import plotly.express as px  
import numpy as np 

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import root_mean_squared_error, r2_score

### Carregar Dados

In [2]:
df_costs = pd.read_csv(r'.\datasets\healthcosts_cleaned.csv')

In [3]:
df_costs.head(10)

Unnamed: 0,age,sex,bmi,children,smoker,region,medical charges
0,19,female,27.9,0,1,southwest,16884.924
1,18,male,33.77,1,0,southeast,1725.5523
2,28,male,33.0,3,0,southeast,4449.462
3,33,male,22.705,0,0,northwest,21984.47061
4,32,male,28.88,0,0,northwest,3866.8552
5,31,female,25.74,0,0,southeast,3756.6216
6,46,female,33.44,1,0,southeast,8240.5896
7,37,female,27.74,3,0,northwest,7281.5056
8,37,male,29.83,2,0,northeast,6406.4107
9,60,female,25.84,0,0,northwest,28923.13692


### Preparação dos Dados

In [4]:
X = df_costs.drop(columns=['medical charges'], axis=1)
y = df_costs['medical charges']

In [5]:
import joblib

preprocessor = joblib.load('preprocessor_dataset_healthcosts.pkl')

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=51)

In [7]:
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [8]:
print(X_train.shape)
print(X_test.shape)

(1070, 10)
(268, 10)


### Treinando o modelo

In [9]:
boosting_model = AdaBoostRegressor(
    estimator=LinearRegression(),
    n_estimators=50,
    learning_rate=1.0,
    random_state=51
)

In [10]:
boosting_model.fit(X_train, y_train)

0,1,2
,estimator,LinearRegression()
,n_estimators,50
,learning_rate,1.0
,loss,'linear'
,random_state,51

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


### Análise de Resultados

In [11]:
y_pred = boosting_model.predict(X_test)

In [12]:
rmse = root_mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [13]:
print(f'RMSE: {rmse}\nR2_Score: {r2}')

RMSE: 6926.93661541702
R2_Score: 0.724052551316343


In [14]:
coefs = np.array([estimator.coef_ for estimator in boosting_model.estimators_])

In [15]:
# Calcular importancias das features
importances = np.mean(np.abs(coefs), axis=0)

In [16]:
importances = importances / np.sum(importances)

In [None]:
# Obter os nomes das features
features_names = preprocessor.get_feature_names_out()

In [18]:
# Criar um DF com as importancias e os nomes das features
importance_df = pd.DataFrame({'feature': features_names, 'importance': importances})

In [19]:
# Ordenar o DF pela importancia
importance_df = importance_df.sort_values(by='importance', ascending=True)

In [21]:
# Criar um gráfico de barras com a importancia das features
fig = px.bar(importance_df, 
             x='importance',
             y='feature',
             title='Importancia das Features',
             orientation='h')

fig.update_xaxes(tickangle=45)
fig.show()

### Propriedades do Modelo

In [None]:
# Erros dos estimadores
boosting_model.estimator_errors_

array([0.1325525 , 0.20039444, 0.26369109, 0.35272733, 0.42748131,
       0.41440901, 0.4657024 , 0.48312978, 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ])

In [23]:
# Pesos dos estimadores
boosting_model.estimator_weights_

array([1.87857623, 1.3838309 , 1.02687142, 0.60707232, 0.2921348 ,
       0.34576816, 0.1374062 , 0.06750651, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])