In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import numpy as np

In [2]:
import pandas as pd

In [3]:
df_clean = pd.read_csv("final.csv")  # Load your dataset

In [4]:
df_clean.columns = df_clean.columns.str.replace(' ', '_')  # Replace spaces with underscores in column names

In [5]:
df_clean.head(5)

Unnamed: 0,Alloy,Finish,Length,Weight,Tolerances,GD&T,Order_Quantity,LME_price,Customer_Category,Lead_Time_(weeks),Quote_Price_(SEK),Quote_Date,Profile_Name,Profile_Complexity,Manufacturing_Difficulty,LME_MA_7,LME_Lag_1
0,0.0,4.0,17.6,1.718,0.147,0.0,50862,2.75,2.0,11.0,3.83,2025-01-01,2.993724,1,0.147,2.75,2.75
1,1.0,3.0,27.0,1.245,0.067,2.0,116471,2.96,1.0,9.0,2.67,2025-01-01,3.096523,5,0.201,2.855,2.75
2,2.0,4.0,24.1,0.885,0.099,2.0,63929,3.97,2.0,7.0,3.6,2025-01-02,3.041944,4,0.297,3.226667,2.96
3,2.0,4.0,26.1,1.042,0.152,1.0,120204,2.83,3.0,10.0,2.61,2025-01-02,3.146656,5,0.304,3.1275,3.97
4,2.0,3.0,34.8,1.368,0.118,0.0,94419,3.59,1.0,2.0,3.06,2025-01-03,3.014675,3,0.118,3.22,2.83


In [6]:
# Prepare dataset
X = df_clean.drop(columns=["Quote_Price_(SEK)", "Quote_Date"])  # Drop target and date
y = df_clean["Quote_Price_(SEK)"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    "Random Forest": RandomForestRegressor(random_state=42),
    "XGBoost": XGBRegressor(random_state=42, verbosity=0),
    "LightGBM": LGBMRegressor(random_state=42)
}

# Train, predict, and evaluate
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)
    results.append({
        "Model": name,
        "MAE": round(mae, 4),
        "RMSE": round(rmse, 4),
        "R2 Score": round(r2, 4)
    })

results_df = pd.DataFrame(results)
#import ace_tools as tools; tools.display_dataframe_to_user(name="Model Performance Comparison", dataframe=results_df)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1697
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 15
[LightGBM] [Info] Start training from score 3.016937


In [7]:
results_df

Unnamed: 0,Model,MAE,RMSE,R2 Score
0,Random Forest,0.0027,0.004,1.0
1,XGBoost,0.0075,0.0099,0.9998
2,LightGBM,0.0048,0.0063,0.9999
