In [21]:
# 1️⃣ Libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb
import lightgbm as lgb

In [22]:
df=pd.read_csv("C:\\dataset\\Laptop-Price-Prediction\\data\\raw\\Laptop_price.csv")
df.head()

Unnamed: 0,Brand,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
0,Asus,3.830296,16,512,11.185147,2.641094,17395.093065
1,Acer,2.912833,4,1000,11.311372,3.260012,31607.605919
2,Lenovo,3.241627,4,256,11.853023,2.029061,9291.023542
3,Acer,3.806248,16,512,12.28036,4.573865,17436.728334
4,Acer,3.268097,32,1000,14.990877,4.193472,32917.990718


In [23]:
# 3️⃣ Split Features & Target
# ----------------------------
X = df.drop('Price', axis=1)
y = df['Price']


In [24]:
# 4️⃣ Encode Categorical Feature
# ----------------------------
cat_feature = 'Brand'  #  categorical feature
le = LabelEncoder()
X[cat_feature] = le.fit_transform(X[cat_feature])


In [25]:
# 5️⃣ Train-Test Split
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [27]:
# 6️⃣ XGBoost Regression
# ----------------------------
model_xgb = xgb.XGBRegressor(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    objective='reg:squarederror',
    random_state=42
)

model_xgb.fit(X_train, y_train)
y_pred_xgb = model_xgb.predict(X_test)

In [None]:
# RMSE compatible for all scikit-learn versions
import os
import numpy as np
import joblib
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))

print("----- XGBoost Regression Results -----")
print("RMSE:", rmse_xgb)
print("R2 Score:", r2_score(y_test, y_pred_xgb))

# Optional: 5-fold Cross-validation
cv_scores_xgb = cross_val_score(model_xgb, X, y, cv=5, scoring='r2')
print("CV Mean R2:", cv_scores_xgb.mean())
# 8️⃣ Save Model
# ----------------------------
# if 'models' folder has no → create it
os.makedirs('models', exist_ok=True)

# Save XGBoost model
joblib.dump(model_xgb, 'models/xgboost_laptop_price.pkl')
print("✅ XGBoost model saved at 'models/xgboost_laptop_price.pkl'")


----- XGBoost Regression Results -----
RMSE: 196.92789189290392
R2 Score: 0.9995728847164996
CV Mean R2: 0.9995078753386011
✅ XGBoost model saved at 'models/xgboost_laptop_price.pkl'


In [None]:
# 7️⃣ LightGBM Regression
# ----------------------------
model_lgb = lgb.LGBMRegressor(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    random_state=42
)

model_lgb.fit(X_train, y_train)
y_pred_lgb = model_lgb.predict(X_test)

rmse_lgb = np.sqrt(mean_squared_error(y_test, y_pred_lgb))

print("\n----- LightGBM Regression Results -----")
print("RMSE:", rmse_lgb)
print("R2 Score:", r2_score(y_test, y_pred_lgb))

# Optional: 5-fold Cross-validation
cv_scores_lgb = cross_val_score(model_lgb, X, y, cv=5, scoring='r2')
print("CV Mean R2:", cv_scores_lgb.mean())

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000054 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 779
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 6
[LightGBM] [Info] Start training from score 19650.767446

----- LightGBM Regression Results -----
RMSE: 194.00668543661243
R2 Score: 0.999585462293502
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 779
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 6
[LightGBM] [Info] Start training from score 19542.217690
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000057