In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np


def linear_regression(df, x, y):
    plt.rcParams["figure.figsize"] = (15, 6)
    correlation_r = df.corr()
    sumary = df.describe()
    plot = sns.scatterplot(data=df, x=x, y=y)
    X = df[x].values.reshape(-1, 1)
    Y = df[y].values.reshape(-1, 1)
    X_train, X_test, y_train, y_test = train_test_split(
        X, Y, test_size=0.2, random_state=40
    )
    display(correlation_r)
    display(
        f"Quantidade de Dados de Teste: {len(X_test)}",
        f"Quantidade de Dados de Treinamento: {len(X_train)}",
    )
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    df_preds = pd.DataFrame({"Atual": y_test.squeeze(), "Previsto": y_pred.squeeze()})
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    print(f"(MAE) Mean Absolute Error: {mae:.2f}")
    print(f"(MSE) Mean Squared Error: {mse:.2f}")
    print(f"(RMSE) Root Mean Squared Error: {rmse:.2f}")
    df_tests = pd.DataFrame({"X_test": X_test[:, 0], "y_pred": y_pred[:, 0]})
    sns.lineplot(data=df_tests, x="X_test", y="y_pred", color="r", linewidth=2.5)
    return regressor


def predict_by_list(model, values_to_predict, x, y):
    array_to_predict = np.array(values_to_predict, dtype=np.float32)
    predicts = model.predict(array_to_predict.reshape(-1, 1))
    return pd.DataFrame({x: values_to_predict, f"Previsões de {y}": predicts[:, 0]})

In [None]:
path = "dse-weight-height-data.csv"
df = pd.read_csv(path)
df.head()

In [None]:
df = df.drop("Gender", axis=1)
df = df.rename({"Height": "Altura", "Weight": "Peso"}, axis=1)
df["Altura"] = df["Altura"].apply(lambda x: x * 0.0254)
df["Peso"] = df["Peso"].apply(lambda x: x * 0.453592)
df.head()

In [None]:
x = "Altura"
y = "Peso"

plt.rcParams["figure.figsize"] = (20, 15)
fig = plt.figure()
fig.subplots_adjust(hspace=0.5, wspace=0.5)
ax = fig.add_subplot(2, 2, 1)
sns.histplot(df[x])
ax = fig.add_subplot(2, 2, 2)
sns.histplot(df[y])
plt.show()

In [None]:
model = linear_regression(df, x, y)

In [None]:
values_to_predict = [1.50, 1.55, 1.80, 2, 2.10]

df_predicts = predict_by_list(model, values_to_predict, x, y)
df_predicts