In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score
)

train_reg = pd.read_csv("/content/sample_data/train_Sleep_health_and_lifestyle_dataset.csv")
val_reg   = pd.read_csv("/content/sample_data/val_Sleep_health_and_lifestyle_dataset.csv")
test_reg  = pd.read_csv("/content/sample_data/test_Sleep_health_and_lifestyle_dataset.csv")

In [None]:
#testagem rápida
train_reg.head()
train_reg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 594 entries, 0 to 593
Data columns (total 25 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              594 non-null    float64
 1   Sleep Duration                   594 non-null    float64
 2   Physical Activity Level          594 non-null    float64
 3   Stress Level                     594 non-null    float64
 4   Heart Rate                       594 non-null    float64
 5   Daily Steps                      594 non-null    float64
 6   BMI Category                     594 non-null    float64
 7   BP_Category                      594 non-null    float64
 8   Gender_Female                    594 non-null    float64
 9   Gender_Male                      594 non-null    float64
 10  Occupation_Accountant            594 non-null    float64
 11  Occupation_Doctor                594 non-null    float64
 12  Occupation_Engineer   

In [None]:
target_col = "Quality of Sleep"

X_train = train_reg.drop(columns=[target_col])
y_train = train_reg[target_col]

X_val = val_reg.drop(columns=[target_col])
y_val = val_reg[target_col]

X_test = test_reg.drop(columns=[target_col])
y_test = test_reg[target_col]
#Testagem rápida

print("X_train:", X_train.shape, "| y_train:", y_train.shape)
print("X_val:  ", X_val.shape,   "| y_val:  ", y_val.shape)
print("X_test: ", X_test.shape,  "| y_test: ", y_test.shape)

X_train: (594, 24) | y_train: (594,)
X_val:   (56, 24) | y_val:   (56,)
X_test:  (57, 24) | y_test:  (57,)


In [None]:
ridge = Ridge(alpha=1.0, random_state=42)
ridge.fit(X_train, y_train)

#primeira testagem real do modelo
print("Testagem do modelo")
print("n_features_:", ridge.n_features_in_)
print("Intercept:", ridge.intercept_)
print("Coef shape:", ridge.coef_.shape)

Testagem do modelo
n_features_: 24
Intercept: 6.944743103012478
Coef shape: (24,)


In [None]:
yhat_train = ridge.predict(X_train)
yhat_val   = ridge.predict(X_val)
yhat_test  = ridge.predict(X_test)

#testagem
print("Shapes dos predicts:")
print("train:", yhat_train.shape)
print("val:  ", yhat_val.shape)
print("test: ", yhat_test.shape)

# tabela comparando real vs previsto (amostra)
pred_view_test = pd.DataFrame({
    "y_true": y_test.values,
    "y_pred": yhat_test
})
pred_view_test["erro"] = (pred_view_test["y_true"] - pred_view_test["y_pred"]).abs()

print("\nAmostra de previsões absolutas (TESTE):")
display(pred_view_test.head(15))

Shapes dos predicts:
train: (594,)
val:   (56,)
test:  (57,)

Amostra de previsões absolutas (TESTE):


Unnamed: 0,y_true,y_pred,erro
0,6,5.969721,0.030279
1,6,5.798605,0.201395
2,6,5.969721,0.030279
3,6,5.934712,0.065288
4,7,6.830325,0.169675
5,8,8.299712,0.299712
6,8,8.33878,0.33878
7,6,5.952934,0.047066
8,9,9.006597,0.006597
9,8,7.885776,0.114224


In [None]:
def eval_reg(y_true, y_pred, label=""):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2   = r2_score(y_true, y_pred)
    print(f"{label:10s} | MAE={mae:.3f} | RMSE={rmse:.3f} | R²={r2:.3f}")

print("Métricas Ridge")
eval_reg(y_train, yhat_train, "Treino")
eval_reg(y_val,   yhat_val,   "Val")
eval_reg(y_test,  yhat_test,  "Teste")

Métricas Ridge
Treino     | MAE=0.174 | RMSE=0.252 | R²=0.971
Val        | MAE=0.166 | RMSE=0.225 | R²=0.966
Teste      | MAE=0.172 | RMSE=0.225 | R²=0.965


In [None]:
res_test = y_test.values - yhat_test

print("Resíduos (TESTE)")
print("média:", np.mean(res_test).round(4))
print("desvio:", np.std(res_test).round(4))
print("min:", np.min(res_test).round(4))
print("max:", np.max(res_test).round(4))

# ver maiores erros
pred_view_test["abs_erro"] = pred_view_test["erro"].abs()
worst = pred_view_test.sort_values("abs_erro", ascending=False).head(10)

print("\nTop 10 maiores erros absolutos (TESTE):")
display(worst.drop(columns=["abs_erro"]))

Resíduos (TESTE)
média: -0.054
desvio: 0.2187
min: -0.7673
max: 0.7455

Top 10 maiores erros absolutos (TESTE):


Unnamed: 0,y_true,y_pred,erro
20,5,5.767305,0.767305
37,9,8.254515,0.745485
6,8,8.33878,0.33878
35,8,8.33878,0.33878
19,8,7.689055,0.310945
49,7,6.691934,0.308066
5,8,8.299712,0.299712
16,7,7.293226,0.293226
24,7,7.258923,0.258923
48,7,7.258923,0.258923


In [None]:
coef_df = pd.DataFrame({
    "feature": X_train.columns,
    "coef": ridge.coef_
})
coef_df["coef"] = coef_df["coef"].abs()
coef_df = coef_df.sort_values("coef", ascending=False)

print("Top 20 coeficientes (maior impacto):")
display(coef_df.head(20))

print("\nBottom 10 coeficientes (quase sem impacto):")
display(coef_df.tail(10))

Top 20 coeficientes (maior impacto):


Unnamed: 0,feature,coef
3,Stress Level,0.632806
10,Occupation_Accountant,0.421623
0,Age,0.412483
16,Occupation_Sales Representative,0.28989
17,Occupation_Salesperson,0.2891
6,BMI Category,0.265497
1,Sleep Duration,0.258392
8,Gender_Female,0.220978
9,Gender_Male,0.220978
22,Sleep Disorder_Normal,0.217721



Bottom 10 coeficientes (quase sem impacto):


Unnamed: 0,feature,coef
14,Occupation_Manager,0.127851
19,Occupation_Software Engineer,0.122624
12,Occupation_Engineer,0.095773
23,Sleep Disorder_Sleep Apnea,0.085454
7,BP_Category,0.081915
4,Heart Rate,0.081847
5,Daily Steps,0.06717
20,Occupation_Teacher,0.049086
11,Occupation_Doctor,0.045196
2,Physical Activity Level,0.024662
