In [1]:
print("Training the XGBoost model...")

import cudf
import xgboost as xgb
import cupy as cp

# Loading the datasets
X_train = cudf.read_csv("../data/train_test/X_train.csv")
y_train = cudf.read_csv("../data/train_test/y_train.csv").iloc[:, 0]

# Conversion cuDF → cupy.ndarray
X_train_cp = X_train.to_cupy()
y_train_cp = y_train.to_cupy()

model = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    use_label_encoder=False,
    eval_metric='mlogloss',
    tree_method='hist',
    device='cuda'
)

# Entraînement sur le GPU avec arrays cupy
model.fit(X_train_cp, y_train_cp)

print("Model trained successfully!")

Training the XGBoost model...
Model trained successfully!


In [9]:
import numpy as np
import json
from cuml.preprocessing import LabelEncoder

#  Read only the labels column from cleaned file to get original class names 
y_labels = cudf.read_csv("../data/processed_file_cleaned.csv", usecols=['mapped_label'])['mapped_label']

# Label encoding
le = LabelEncoder()
_ = le.fit_transform(y_labels)

# Save the model
model.save_model("../models/xgb_model_v1.json")
print("Model saved as xgb_model_v1.json")

# Convert cudf column -> pandas -> numpy array of strings
classes_np = le.classes_.to_pandas().astype(str).to_numpy()

# Fix for .npy: convert to fixed-length Unicode array so allow_pickle=False works
max_len = max(len(s) for s in classes_np)
classes_np_fixed = classes_np.astype(f'<U{max_len}')

# Save as .npy → fast, reliable, ideal for Python/NumPy
np.save("../models/label_encoder_classes_v1.npy", classes_np_fixed, allow_pickle=False)
print("Label Encoder classes saved as label_encoder_classes_v1.npy")

# Save as .json → human-readable, portable, easy to inspect/share
with open("../models/label_encoder_classes_v1.json", "w", encoding="utf-8") as f:
    json.dump(classes_np_fixed.tolist(), f, ensure_ascii=False)
print("Label Encoder classes saved as label_encoder_classes_v1.json")

Model saved as xgb_model_v1.json
Label Encoder classes saved as label_encoder_classes_v1.npy
Label Encoder classes saved as label_encoder_classes_v1.json
