In [1]:
from typing import Tuple, List
import pandas as pd
import numpy as np
import seaborn as sns
import xgboost as xgb

import matplotlib.pyplot as plt
import plotly.graph_objects as go 
import plotly.express as px

from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import normalize, LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

from xgboost import XGBRegressor

In [None]:
#from google.colab import files

# For google colab
#uploaded = files.upload()


Data

In [2]:
df_sensors = pd.read_csv('nuclear-waste/Coordinates_Training.csv')
hum = pd.read_csv('nuclear-waste/Training_data_humidity.csv')
pre = pd.read_csv('nuclear-waste/Training_data_pressure.csv')
df_tem = pd.read_csv('nuclear-waste/Training_data_temperature.csv')  

ex = pd.read_csv('nuclear-waste/example_of_submission.csv')

df_test = pd.read_csv('nuclear-waste/Coordinates_Test.csv')
humtest = pd.read_csv('nuclear-waste/Test_Time_humidity.csv')
pretest = pd.read_csv('nuclear-waste/Test_Time_pressure.csv')
               



In [None]:
# With the scatter_3d function from plotly, we will visualize the sensors position in space

fig = px.scatter_3d(
    df_sensors,
    x='Coor X [m]',
    y='Coor Y [m]',
    z='Coor Z [m]',
    width = 800,
    height = 600,
    hover_name='Sensor ID' 
)

fig.update_traces(marker=dict(size=3))

fig.show()


Cylinder shape, disc on XZ plane

In [None]:
# Defining functions

In [3]:
# Replace with value

def replacewithvalue(
    df : pd.DataFrame,
    header : str,
    val = np.nan,
    min_val = -float('inf') ,
    max_val = float('inf') ,
) -> int:
    """
    Replace values between a certain interval with a unique value
    Clipping

    df : DataFrame
    header : Header of column
    value : Value used to replace, NaN by default
    min_val : minimal value of the interval
    max_val : maximal value of the interval
    """
    for i in df[header]:
        if min_val < i < max_val and (type(i) == float or type(i) == str) :
            df.replace(to_replace = i, value=val, inplace= True)
    return df



In [4]:
# Here we will replace NaNs with another value. For example, the arithmetric average

def avgcolumn(
    df : pd.DataFrame,
    header : str,
    min_val = -float('inf') ,
    max_val = float('inf') ,
) -> float:
    """
    Returns average of a DateFrame column

    df : The target DataFrame
    header : Header of column
    min_val : minimal value of the interval
    max_val : maximal value of the interval
    """
    liste = []
    for i in df[header]:
        if min_val < i < max_val and isinstance(i, (int, float))  :
            liste.append(i)
    if not liste :
        return 0
    else :
        return sum(liste)/len(liste)


In [5]:
# Make it so it removes sensors

def removesensor(
    df: pd.DataFrame, 
    sensor: str
)-> pd.DataFrame:
    """
    Removes sensor and it's data from the DataFrame
    df : The target DataFrame
    sensor : The removed sensor
    """
    df_cleaned = df[df['Sensor ID'] != sensor]
    return df_cleaned

In [6]:
def Xy(
    fulldf : pd.DataFrame
):
    """
    Takes DataFrame and split it into features X and target y (optional)
    Returns:
    - X, y : if target 'Temperature' column is present
    - X : otherwise
    """
    
    cols = ['M.Time[d]', 'Coor X [m]', 'Coor Y [m]', 'Coor Z [m]', 'R [m]', 'Humidity', 'Pressure']

    if 'Material' in fulldf.columns:
        X = fulldf[cols + ['Material']]
    elif 'Material_encoded' in fulldf.columns:
        X = fulldf[cols + ['Material_encoded']]
    else:
        raise ValueError("Missing 'Material' or 'Material_encoded' column in DataFrame.")
    
    if 'Temperature' in fulldf.columns : 
        y = fulldf[['Temperature']]
        return X, y
    else:
        return X

In [7]:
def eval(
    y_val : pd.DataFrame,
    y_pred : pd.DataFrame,
    model
):
    """
    Outputs the Mean Squared Error (MSE) and the coefficient of determination (R²) as well as the importance of features (for xgboost mainly)
    y_val : DataFrame of validation data
    y_pred : DataFrame of prediction data
    model : model
    """
    mse = mean_squared_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)

    print(f"MSE : {mse:.4f}")
    print(f"R² : {r2:.4f}")
    print(f"Features importances : {model.feature_importances_}")

In [8]:
# Preprocessing

# Combining the data on one data frame will facilitate the access to the said data

df_long = df_tem.melt(id_vars='M.Time[d]', var_name='Sensor ID', value_name='Temperature')
df_merged = df_long.merge(df_sensors[['Sensor ID', 'Index', 'Material','Coor X [m]','Coor Y [m]','Coor Z [m]', 'R [m]']], on='Sensor ID', how='left')


In [9]:
# Let's apply these functions

# First, by scrolling through the training data, we found that N_442 didn't have any temperature values attributed to it 
display(df_merged[df_merged['Sensor ID']=='N_442'])

# So we decided to remove it entirely.
# Remove N_442
df_merged = removesensor(df_merged,'N_442')
 
# We will mention the reason later

Unnamed: 0,M.Time[d],Sensor ID,Temperature,Index,Material,Coor X [m],Coor Y [m],Coor Z [m],R [m]
14112,1554,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14113,1556,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14114,1558,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14115,1560,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14116,1563,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14117,1567,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14118,1572,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14119,1578,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14120,1585,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314
14121,1595,N_442,,442,VOID,-0.458902,6.606527,-0.309136,0.553314


In [10]:
#Adding humidity as a feature
df_hum = hum.melt(id_vars='M.Time[d]', var_name='Sensor ID', value_name='Humidity')

# Calculating the average humidity of the sensors with data on it
averageh = avgcolumn(df_hum,'Humidity')

# Filling NaNs with this average
df_hum = df_hum.fillna(averageh)

# Then, add a column in the main DataFrame
df_merged['Humidity'] = df_hum['Humidity']


In [11]:
# Adding pressure as a feature
df_pre = pre.melt(id_vars='M.Time[d]', var_name='Sensor ID', value_name='Pressure')

# Calculating the average pressure of the sensors with data on it
averagep = avgcolumn(df_pre,'Pressure')

# Filling NaNs with this average
df_pre = df_pre.fillna(averagep)

# Then, add a column in the main DataFrame
df_merged['Pressure'] = df_pre['Pressure']

# We didn't remove potential outliers from either humidity pressure as we didn't find a reliable way to find them. 
# They didn't end up causing any major issues.

In [12]:
# We can observe a huge gap in the temperature data, here we print the two value next to where the gap occurs
print(df_merged[df_merged['Temperature']<1000]['Temperature'].max())
print(df_merged[df_merged['Temperature']>1000]['Temperature'].min())

# We can safely assume that the latter values are outliers. We then remove them
replacewithvalue(df_merged,'Temperature', min_val = 1000.0)

# Calculating the average temperature of the sensors with data on it (without outliers!)
average = avgcolumn(df_merged,'Temperature')

# Filling NaNs with this average
df_merged = df_merged.fillna(average)

# Since an average isn't a very realistic value for missing data, we want to minimize its presence in our dataset, hence the removal of N_442

141.2467449
2518.947148359651


In [13]:
# We combine the dataset and extract features and target variables
X, y = Xy(df_merged)

# Renaming the columns for simplicity and clarity
X = X.rename(columns={
    'M.Time[d]': 'Time',
    'Coor X [m]': 'X',
    'Coor Y [m]': 'Y',
    'Coor Z [m]': 'Z',
    'R [m]': 'R'
})

# We then use the train_test_split function to, in our case, split our data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=6)

In [14]:
# Material encoder

# Label encoder, numeralize the material to make it a feature
# This is the first encoder we used, we ended up changing it

#le = LabelEncoder()
#df_merged['Material_encoded'] = le.fit_transform(df_merged['Material'])

# material_target_mean will be re-used for test data
material_target_mean = X_train.assign(temp=y_train).groupby('Material')['temp'].mean()

X_train['Material_encoded'] = X_train['Material'].map(material_target_mean)
X_val['Material_encoded'] = X_val['Material'].map(material_target_mean).fillna(y_train.mean())

X_train = X_train.drop('Material', axis=1)
X_val = X_val.drop('Material', axis=1)

In [15]:
X_train

Unnamed: 0,Time,X,Y,Z,R,Humidity,Pressure,Material_encoded
19626,1606,-0.035962,32.119083,-3.393802,3.393993,100.000000,610.750281,21.427508
7442,1895,-0.909658,43.462893,-7.054084,7.112494,100.000000,1439.949735,21.427508
20998,1572,-1.648518,15.042693,-2.251196,2.790250,99.954874,-109.128552,21.427508
18307,1560,0.657925,40.670607,-2.874547,2.948879,100.000000,661.052421,21.427508
8571,4133,-3.868273,23.470900,-3.129738,4.975822,100.000000,663.889383,21.427508
...,...,...,...,...,...,...,...,...
8527,1726,7.059769,12.444337,13.374795,15.123673,100.000000,1481.812486,21.427508
4714,1606,2.624371,34.454435,-14.775903,15.007153,100.000000,1575.313804,21.427508
10196,2090,-8.076927,37.847372,12.613456,14.977851,100.000000,1474.120678,21.427508
8419,1560,17.696309,37.374955,-8.553091,19.654890,100.000000,1587.216234,21.427508


In [16]:
X_val

Unnamed: 0,Time,X,Y,Z,R,Humidity,Pressure,Material_encoded
563,1982,16.968222,33.114548,9.603087,19.497175,100.000000,1547.243977,21.427508
1259,1621,-1.075008,35.115563,6.027518,6.122631,100.000000,1255.884382,21.427508
27100,4779,-2.211542,32.881195,2.876762,3.628591,100.000000,792.217394,21.427508
5594,3616,10.920055,6.810644,-5.796728,12.363238,100.000000,1588.007175,21.427508
21223,1578,1.867994,15.712474,-0.473972,1.927188,99.561104,-1063.607571,21.427508
...,...,...,...,...,...,...,...,...
27138,1558,-1.328886,37.648055,0.412752,1.391511,76.191193,-66012.684210,52.483605
9315,1560,0.113259,27.873806,0.598437,0.609060,75.057030,-75676.798240,52.483605
15242,1606,2.979139,45.792281,-11.000491,11.396757,100.000000,1552.724630,21.427508
397,1662,-10.308507,45.319484,-15.832324,18.892533,100.000000,1632.562777,21.427508


In [None]:
# We tested Linear regression L1, linear regression L2 and KNN Regressor --> Not good

In [None]:
# Random Forest Regressor 

rf = RandomForestRegressor(n_estimators= 85, max_depth= 6, random_state= 13)
a = rf.fit(X_train, y_train.values.ravel())
y_predrf = rf.predict(X_val)


In [None]:
eval(y_val, y_predrf,rf)

In [17]:
# XGBoost

xgbm = XGBRegressor(n_estimators= 85, max_depth= 6, learning_rate= 0.09, random_state= 13)
xgbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], verbose=True)
y_predxgbm = xgbm.predict(X_val)


[0]	validation_0-rmse:14.15308	validation_1-rmse:13.83967
[1]	validation_0-rmse:12.99239	validation_1-rmse:12.73524
[2]	validation_0-rmse:11.92203	validation_1-rmse:11.70303
[3]	validation_0-rmse:10.95414	validation_1-rmse:10.76329
[4]	validation_0-rmse:10.07367	validation_1-rmse:9.91778
[5]	validation_0-rmse:9.27568	validation_1-rmse:9.14770
[6]	validation_0-rmse:8.55039	validation_1-rmse:8.44120
[7]	validation_0-rmse:7.89216	validation_1-rmse:7.80841
[8]	validation_0-rmse:7.29111	validation_1-rmse:7.23012
[9]	validation_0-rmse:6.75691	validation_1-rmse:6.70661
[10]	validation_0-rmse:6.26457	validation_1-rmse:6.23259
[11]	validation_0-rmse:5.82494	validation_1-rmse:5.81065
[12]	validation_0-rmse:5.43030	validation_1-rmse:5.42987
[13]	validation_0-rmse:5.07028	validation_1-rmse:5.09590
[14]	validation_0-rmse:4.75194	validation_1-rmse:4.78514
[15]	validation_0-rmse:4.45897	validation_1-rmse:4.51292
[16]	validation_0-rmse:4.19832	validation_1-rmse:4.25813
[17]	validation_0-rmse:3.95731	v

In [18]:
eval(y_val, y_predxgbm, xgbm)
X_train

MSE : 5.0041
R² : 0.9779
Features importances : [0.10515049 0.00494711 0.02111005 0.00783627 0.5451717  0.21660084
 0.08414583 0.01503769]


Unnamed: 0,Time,X,Y,Z,R,Humidity,Pressure,Material_encoded
19626,1606,-0.035962,32.119083,-3.393802,3.393993,100.000000,610.750281,21.427508
7442,1895,-0.909658,43.462893,-7.054084,7.112494,100.000000,1439.949735,21.427508
20998,1572,-1.648518,15.042693,-2.251196,2.790250,99.954874,-109.128552,21.427508
18307,1560,0.657925,40.670607,-2.874547,2.948879,100.000000,661.052421,21.427508
8571,4133,-3.868273,23.470900,-3.129738,4.975822,100.000000,663.889383,21.427508
...,...,...,...,...,...,...,...,...
8527,1726,7.059769,12.444337,13.374795,15.123673,100.000000,1481.812486,21.427508
4714,1606,2.624371,34.454435,-14.775903,15.007153,100.000000,1575.313804,21.427508
10196,2090,-8.076927,37.847372,12.613456,14.977851,100.000000,1474.120678,21.427508
8419,1560,17.696309,37.374955,-8.553091,19.654890,100.000000,1587.216234,21.427508


In [None]:
# Gradient Boosting Regression

gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.08)
gbr.fit(X_train, y_train.values.ravel())
y_predgbr = gbr.predict(X_val)


In [None]:
eval(y_val, y_predgbr)

In [None]:
# Comparisons

# Random Forest Regressor
#
#XGBoost
#
#Gradient Boosting Regression

In [None]:
cv_scores = cross_val_score(rf, X_train, y_train.values.ravel(), cv=5, scoring='neg_mean_squared_error')
print("MSE CV RF :", -cv_scores.mean())

In [None]:
cv_scores = cross_val_score(xgbm, X_train, y_train.values.ravel(), cv=5, scoring='neg_mean_squared_error')
print("MSE CV XGB :", -cv_scores.mean())

In [None]:
cv_scores = cross_val_score(gbr, X_train, y_train.values.ravel(), cv=5, scoring='neg_mean_squared_error')
print("MSE CV GBR :", -cv_scores.mean())

In [None]:
# Hyperparameters search

In [None]:
# Définir la grille d'hyperparamètres à tester
param_grid = {
    'n_estimators': [75, 80, 85, 90],
    'max_depth': [6, 7, 8],
    'learning_rate': [0.08,0.085,0.09,0.095]
}

# Configurer la recherche par validation croisée
grid_search = GridSearchCV(
    estimator=xgb,
    param_grid=param_grid,
    cv=5,  # nombre de folds pour la validation croisée
    scoring='neg_mean_squared_error',  # ou 'r2', 'neg_mean_absolute_error', etc.
    verbose=1,
    n_jobs=-1  # utilise tous les cœurs disponibles
)

# Lancer la recherche
grid_search.fit(X_train, y_train)

# Afficher les meilleurs paramètres trouvés
print("Meilleurs hyperparamètres :", grid_search.best_params_)

# Utiliser le meilleur modèle pour prédire
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_val)

In [None]:
# Neural Network
# Initialize the neural network
nn = MLPRegressor(
    hidden_layer_sizes=(100, 50),  # you can experiment with these
    activation='relu',
    solver='adam',
    max_iter=300,
    random_state=1
)

# Fit the model
nn.fit(X_train_s, y_train_s.values.ravel())

# Predict (scaled)
y_pred_scaled = nn.predict(X_val_s)

# Inverse transform predictions and targets
y_pred = target_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1))
y_val_real = target_scaler.inverse_transform(y_val_s)

# Evaluate
mse = mean_squared_error(y_val_real, y_pred)
r2 = r2_score(y_val_real, y_pred)

print(f"MSE : {mse:.4f}")
print(f"R²  : {r2:.4f}")

In [None]:
# Final Test 

In [19]:
#le2 = LabelEncoder()
#df_test['Material_encoded'] = le2.fit_transform(df_test['Material'])
df_test["key"] = 1
df_times = pd.DataFrame({"M.Time[d]": df_tem['M.Time[d]']})
df_times["key"] = 1
df_test_prepared = pd.merge(df_test, df_times, on="key").drop("key", axis=1)

In [20]:
df_humtest = humtest.melt(id_vars='M.Time[d]', var_name='Sensor ID', value_name='Humidity')
df_test_prepared['Humidity'] = df_humtest['Humidity']

In [21]:
df_pretest = pretest.melt(id_vars='M.Time[d]', var_name='Sensor ID', value_name='Pressure')
df_test_prepared['Pressure'] = df_pretest['Pressure']

In [22]:
df_test_prepared

Unnamed: 0.1,Unnamed: 0,Sensor ID,Index,Material,Coor X [m],Coor Y [m],Coor Z [m],R [m],M.Time[d],Humidity,Pressure
0,0,N_901,901,OPA,0.295918,30.011150,1.776254,1.800735,1554,99.449325,-1333.500175
1,0,N_901,901,OPA,0.295918,30.011150,1.776254,1.800735,1556,99.448936,-1334.513483
2,0,N_901,901,OPA,0.295918,30.011150,1.776254,1.800735,1558,99.448741,-1335.179754
3,0,N_901,901,OPA,0.295918,30.011150,1.776254,1.800735,1560,99.448776,-1335.433509
4,0,N_901,901,OPA,0.295918,30.011150,1.776254,1.800735,1563,99.449343,-1334.889994
...,...,...,...,...,...,...,...,...,...,...,...
4635,145,N_1046,1046,OPA,1.117368,21.868866,-2.503031,2.741109,4133,100.000000,202.170497
4636,145,N_1046,1046,OPA,1.117368,21.868866,-2.503031,2.741109,4779,100.000000,100.066792
4637,145,N_1046,1046,OPA,1.117368,21.868866,-2.503031,2.741109,5587,99.998624,-3.608475
4638,145,N_1046,1046,OPA,1.117368,21.868866,-2.503031,2.741109,6597,99.959043,-107.650117


In [23]:
# Material encoder
df_test_prepared['Material_encoded'] = df_test_prepared['Material'].map(material_target_mean).fillna(y_train.mean())
df_test_prepared = df_test_prepared.drop('Material', axis=1)

In [24]:
X_test = Xy(df_test_prepared)

X_test = X_test.rename(columns={
    'M.Time[d]': 'Time',  
    'Sensor ID': 'Sensor ID',
    'Coor X [m]': 'X',
    'Coor Y [m]': 'Y',
    'Coor Z [m]': 'Z',
    'R [m]': 'R'
})




In [25]:
X_test

Unnamed: 0,Time,X,Y,Z,R,Humidity,Pressure,Material_encoded
0,1554,0.295918,30.011150,1.776254,1.800735,99.449325,-1333.500175,21.427508
1,1556,0.295918,30.011150,1.776254,1.800735,99.448936,-1334.513483,21.427508
2,1558,0.295918,30.011150,1.776254,1.800735,99.448741,-1335.179754,21.427508
3,1560,0.295918,30.011150,1.776254,1.800735,99.448776,-1335.433509,21.427508
4,1563,0.295918,30.011150,1.776254,1.800735,99.449343,-1334.889994,21.427508
...,...,...,...,...,...,...,...,...
4635,4133,1.117368,21.868866,-2.503031,2.741109,100.000000,202.170497,21.427508
4636,4779,1.117368,21.868866,-2.503031,2.741109,100.000000,100.066792,21.427508
4637,5587,1.117368,21.868866,-2.503031,2.741109,99.998624,-3.608475,21.427508
4638,6597,1.117368,21.868866,-2.503031,2.741109,99.959043,-107.650117,21.427508


In [26]:
# MODIFY MODEL 
         # HERE 
        #   v
y_pred_t = xgbm.predict(X_test)
y_pred_t.shape
y_pred_t = y_pred_t.reshape(int(4640/32),32)

In [27]:
header = df_tem['M.Time[d]'].to_numpy()
header = header.astype(str)
header[header == '0'] = 'id'
header = list(header)

In [28]:
ex['id']
ids = ex['id'].to_numpy()

In [29]:
final = pd.DataFrame(y_pred_t, columns=header)
final.insert(0, "id", ids)


In [30]:
final.to_csv("Results/val.csv", index=False)

In [31]:
with open("Results/final47.csv", "r", encoding="utf-8") as f1, open("Results/val.csv", "r", encoding="utf-8") as f2:
    lignes1 = f1.readlines()
    lignes2 = f2.readlines()

if lignes1 == lignes2:
    print("Les fichiers sont identiques.")
else:
    print("Les fichiers sont différents.")


Les fichiers sont identiques.


In [32]:

# Charger les deux fichiers CSV
df1 = pd.read_csv("Results/final47.csv")
df2 = pd.read_csv("Results/val.csv")

# Trier les colonnes et les lignes
df1_sorted = df1.sort_index(axis=1).sort_values(by=df1.columns.tolist()).reset_index(drop=True)
df2_sorted = df2.sort_index(axis=1).sort_values(by=df2.columns.tolist()).reset_index(drop=True)

# Comparer
identiques = df1_sorted.equals(df2_sorted)
print("Les fichiers sont identiques." if identiques else "Les fichiers sont différents.")


Les fichiers sont identiques.


In [None]:
# Initialiser le modèle
xgbm = XGBRegressor(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.09,
    objective='reg:squarederror',  # 'reg:absoluteerror' si tu veux MAE native
    random_state=6
)

# Stocker les prédictions à chaque round
train_preds = []
val_preds = []

xgbm.set_params(eval_metric="mae")
xgbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], verbose=True)


# Accéder aux prédictions intermédiaires
for i in range(1, len(xgbm.evals_result()['validation_0']['mae']) + 1):
    train_pred = xgbm.predict(X_train, iteration_range=(0, i))
    val_pred = xgbm.predict(X_val, iteration_range=(0, i))
    train_preds.append(train_pred)
    val_preds.append(val_pred)

# Calculer les métriques à chaque étape
train_mae = [mean_absolute_error(y_train, p) for p in train_preds]
val_mae   = [mean_absolute_error(y_val, p) for p in val_preds]

train_mse = [mean_squared_error(y_train, p) for p in train_preds]
val_mse   = [mean_squared_error(y_val, p) for p in val_preds]

train_r2  = [r2_score(y_train, p) for p in train_preds]
val_r2    = [r2_score(y_val, p) for p in val_preds]

# Tracer les courbes
plt.figure(figsize=(15, 12))

plt.subplot(3, 1, 1)
plt.plot(train_mae, label='Train MAE')
plt.plot(val_mae, label='Val MAE')
plt.ylabel('MAE')
plt.legend()
plt.grid(True)

plt.subplot(3, 1, 2)
plt.plot(train_mse, label='Train MSE')
plt.plot(val_mse, label='Val MSE')
plt.ylabel('MSE')
plt.legend()
plt.grid(True)

plt.subplot(3, 1, 3)
plt.plot(train_r2, label='Train R²')
plt.plot(val_r2, label='Val R²')
plt.xlabel('Boosting Rounds')
plt.ylabel('R²')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()
