In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize

In [2]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, KFold, RandomizedSearchCV
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, make_scorer, mean_squared_error, root_mean_squared_error

In [6]:
from ordinalgbt.lgb import LGBMOrdinal

In [3]:
# Custom RMSE scoring with rounding
def rounded_rmse(y_true, y_pred):
    # Round predictions to nearest integer
    y_pred_rounded = np.round(y_pred)
    return np.sqrt(mean_squared_error(y_true, y_pred_rounded))

# Custom scorer for GridSearchCV
rmse_scorer = make_scorer(rounded_rmse, greater_is_better=False)

In [12]:
df = pd.read_csv("datas/df_1124v4_train.csv")
df_submission = pd.read_csv("datas/df_1124v4_test.csv")
# df = pd.read_csv("datas/df_1122.csv")
# df_pca = pd.read_csv("datas/df_1122_pca.csv")

In [13]:
X, y = df.drop(columns=["price"]), df["price"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=23
)

In [24]:
y = y.astype('int64')
y_train = y_train.astype('int64')

In [28]:
model = LGBMOrdinal()
param_grid = {
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
    'num_leaves': [255, 511, 1023, 2047, None],
    'max_depth': [None, 3, 4, 5, 7, 8, 9, 10],
    'learning_rate': [.08, 0.05, 0.03, .01],
    'reg_alpha': [3, 2.5, 2, 1, .05, .03, .01]
}

# Train the model with GridSearchCV
grid_search = RandomizedSearchCV(
    model,
    param_grid,
    n_iter=100,
    cv=5,
    n_jobs=-1,
    verbose=2,
    error_score='raise'
)
grid_search.fit(X_train, y_train)

# Use the best model
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f"Best XGB parameters: {best_params}")

Fitting 5 folds for each of 100 candidates, totalling 500 fits
Best XGB parameters: {'reg_alpha': 0.03, 'num_leaves': 1023, 'n_estimators': 300, 'max_depth': 9, 'learning_rate': 0.08}


In [29]:
# Predict on the validation set
y_pred = best_model.predict(X_test)
y_pred_rounded = np.clip(np.round(y_pred), 0, 5)

# Calculate final RMSE
final_rmse = np.sqrt(mean_squared_error(y_test, y_pred_rounded))
print(f"Final RMSE: {final_rmse}")

Final RMSE: 0.9603608663573153


In [None]:
# Predict on the validation set
y_pred = model1.predict(X_test)
y_pred_rounded = np.clip(np.round(y_pred), 0, 5)

# Calculate final RMSE
final_rmse = np.sqrt(mean_squared_error(y_test, y_pred_rounded))
print(f"Final RMSE: {final_rmse}")