In [13]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np

In [5]:
features_columns = [
    "PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",
    "RADIACAO GLOBAL (KJ/m²)",
    "TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",
    "TEMPERATURA DO PONTO DE ORVALHO (°C)",
    "UMIDADE RELATIVA DO AR, HORARIA (%)",
    "VENTO, VELOCIDADE HORARIA (m/s)"
]

target_column = ["PRECIPITAÇÃO TOTAL, HORÁRIO (mm)"]

In [6]:
X = pd.read_csv("/kaggle/input/training-dataset-random-forest/step_5_df.csv", usecols=features_columns)
y = pd.read_csv("/kaggle/input/training-dataset-random-forest/step_5_df.csv", usecols=target_column)

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [11]:
gbr = GradientBoostingRegressor(
    n_estimators=100,    # Número de árvores (boosting iterations)
    learning_rate=0.1,   # Taxa de aprendizado
    max_depth=3,         # Profundidade máxima de cada árvore
    random_state=42,

    verbose=2,
    loss="squared_error"
)

gbr.fit(X_train, y_train.values.ravel())

      Iter       Train Loss   Remaining Time 
         1           1.7020          147.17m
         2           1.6884          141.93m
         3           1.6773          139.12m
         4           1.6678          137.22m
         5           1.6604          135.09m
         6           1.6533          133.51m
         7           1.6475          132.02m
         8           1.6423          130.50m
         9           1.6379          129.10m
        10           1.6341          127.61m
        11           1.6309          126.09m
        12           1.6279          124.69m
        13           1.6254          123.12m
        14           1.6232          121.77m
        15           1.6212          120.42m
        16           1.6194          118.99m
        17           1.6177          117.52m
        18           1.6163          116.07m
        19           1.6149          114.64m
        20           1.6138          113.21m
        21           1.6126          111.75m
        2

In [15]:
y_pred = gbr.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse:.2f}")
print(f"R²: {r2:.2f}")
print(f"RMSE: {np.sqrt(mse):.2f}")

MSE: 1.59
R²: 0.08
RMSE: 1.26
