In [14]:
from sklearn.metrics import make_scorer, mean_absolute_error, mean_absolute_percentage_error, r2_score, root_mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from definitions import DATASET_URL

DATASET_URL = '.' + DATASET_URL

## Importando dados e convers√µes

In [15]:
# importando dados
dados = pd.read_csv(DATASET_URL, names=['altura_pai', 'altura_filho'], header=0)

# convertendo polegadas para metros
dados = round(dados * 0.0254, 2)

In [16]:
# separando dados
X = dados[['altura_pai']]
y = dados['altura_filho']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Treinando o modelo

In [17]:
modelo = LinearRegression().fit(X_scaled, y)

[modelo.intercept_, modelo.coef_]

[np.float64(1.7444805194805197), array([0.03544524])]

## Avaliando o modelo

In [19]:

metricas = {
        'RMSE': make_scorer(root_mean_squared_error),
        'MAE': make_scorer(mean_absolute_error),
        'MAPE': make_scorer(mean_absolute_percentage_error),
        'R2': make_scorer(r2_score)
}

pontuacao = cross_validate(modelo, X, y, cv=5, scoring=metricas)

avaliacao = {
    'RMSE': np.mean(pontuacao['test_RMSE']).item(),
    'MAE': np.mean(pontuacao['test_MAE']).item(),
    'MAPE': np.mean(pontuacao['test_MAPE']).item(),
    'R2': np.mean(pontuacao['test_R2']).item(),
}

avaliacao

{'RMSE': 0.06210971123988276,
 'MAE': 0.04827594587137074,
 'MAPE': 0.027766760022793695,
 'R2': 0.24450021977922637}