In [1]:
#import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
import xgboost as xgb
import lightgbm as lgb
import warnings
warnings.filterwarnings('ignore')

In [2]:
df_train = pd.read_csv("data/final_train.csv")
df_test = pd.read_csv("data/final_test.csv")

In [3]:
df_train.head()

Unnamed: 0,manufacturer_Acer,manufacturer_Apple,manufacturer_Asus,manufacturer_Chuwi,manufacturer_Dell,manufacturer_Fujitsu,manufacturer_Google,manufacturer_HP,manufacturer_Huawei,manufacturer_LG,...,gpu_provider_Nvidia,screen_size,total_pixels,ram,ssd,hdd,hybrid,clock_speed,weight_kg,price
0,0,1,0,0,0,0,0,0,0,0,...,0,13.3,4096000.0,8.0,128.0,0.0,0.0,2.3,1.37,11912523.48
1,0,1,0,0,0,0,0,0,0,0,...,0,13.3,1296000.0,8.0,0.0,0.0,128.0,1.8,1.34,7993374.48
2,0,0,0,0,0,0,0,1,0,0,...,0,15.6,2073600.0,8.0,256.0,0.0,0.0,2.5,1.86,5112900.0
3,0,1,0,0,0,0,0,0,0,0,...,0,15.4,5184000.0,16.0,512.0,0.0,0.0,2.7,1.83,22563005.4
4,0,1,0,0,0,0,0,0,0,0,...,0,13.3,4096000.0,8.0,256.0,0.0,0.0,3.1,1.37,16037611.2


In [4]:
df_test.head()

Unnamed: 0,manufacturer_Acer,manufacturer_Apple,manufacturer_Asus,manufacturer_Chuwi,manufacturer_Dell,manufacturer_Fujitsu,manufacturer_Google,manufacturer_HP,manufacturer_Huawei,manufacturer_LG,...,gpu_provider_Nvidia,screen_size,total_pixels,ram,ssd,hdd,hybrid,clock_speed,weight_kg,price
0,0,0,0,0,0,0,0,1,0,0,...,0,15.6,1049088.0,6.0,0.0,1024.0,0.0,2.7,2.04,5148468.0
1,0,0,1,0,0,0,0,0,0,0,...,1,17.3,2073600.0,16.0,256.0,1024.0,0.0,2.8,2.99,15552108.0
2,0,0,0,0,1,0,0,0,0,0,...,0,15.6,2073600.0,12.0,512.0,0.0,0.0,2.7,2.19,11550708.0
3,0,0,0,0,0,0,0,0,0,0,...,0,13.3,2073600.0,4.0,128.0,0.0,0.0,2.3,1.2,10625940.0
4,0,0,0,0,0,0,0,0,0,0,...,0,15.6,2073600.0,6.0,256.0,0.0,0.0,3.6,2.2,4881708.0


In [5]:
#train-val-test split
x = df_train.drop("price", axis = 1)
y = df_train["price"]

#price bins for stratified sampling
y_bins = pd.qcut(y, q = 5, labels = False)

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size = 0.2, random_state = 42,
                                                  stratify = y_bins)

x_test = df_test.drop("price", axis = 1)
y_test = df_test["price"]

print(f"Training set: {x_train.shape}")
print(f"Validation set: {x_val.shape}")
print(f"Testing set: {x_test.shape}")

Training set: (781, 44)
Validation set: (196, 44)
Testing set: (325, 44)


In [6]:
#function for evaluation
def calculate_metrics(y_true, y_pred):
    return {
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAE': mean_absolute_error(y_true, y_pred),
        'R2': r2_score(y_true, y_pred),
        'MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    }


# final result dictionary
results = {}

In [7]:
#Linear Regression

#pipeline
linear_pipeline = Pipeline([
    ("scaler", RobustScaler()),
    ("regressor", LinearRegression())
])

#fit model
linear_pipeline.fit(x_train, y_train)

#prediction for train val test data
linear_y_train_hat = linear_pipeline.predict(x_train)
linear_y_val_hat = linear_pipeline.predict(x_val)
linear_y_test_hat = linear_pipeline.predict(x_test)

#evaluation metrics for train val test
linear_train_metrics = calculate_metrics(y_train, linear_y_train_hat)
linear_val_metrics = calculate_metrics(y_val, linear_y_val_hat)
linear_test_metrics = calculate_metrics(y_test, linear_y_test_hat)

#cv rmse score
linear_cv_scores = cross_val_score(linear_pipeline, x_train, y_train, cv = 5, scoring='neg_mean_squared_error')
linear_cv_rmse = np.sqrt(-linear_cv_scores.mean())

#store result
results["Linear_Regression"] = {
    "train_metrics" : linear_train_metrics,
    "val_metrics" : linear_val_metrics,
    "test_metrics" : linear_test_metrics,
    "cv_rmse" : linear_cv_rmse,
    "model" : linear_pipeline
}

print(f"Linear Regression:\nTest R²: {linear_test_metrics['R2']:.4f} \nTest RMSE: {linear_test_metrics['RMSE']:,.0f}")

Linear Regression:
Test R²: 0.6692 
Test RMSE: 3,412,918


In [8]:
#Ridge Regression

#pipeline
ridge_pipeline = Pipeline([
    ('scaler', RobustScaler()),
    ('regressor', Ridge(random_state=42))
])

#parameters grid
ridge_params = {
    'regressor__alpha': [0.1, 1.0, 10.0, 100.0, 1000.0],
    'regressor__solver': ['auto', 'svd', 'saga']
}

#Grid Search CV
ridge_grid = GridSearchCV(ridge_pipeline, 
                          ridge_params,
                          cv = 5,
                          scoring = "neg_mean_squared_error",
                          n_jobs = -1,
                          verbose = 0
                         )

#fit model 
ridge_grid.fit(x_train, y_train)

#best model
ridge_best_model = ridge_grid.best_estimator_
print(f"Best Ridge params: {ridge_grid.best_params_}")

#prediction for train val test data
ridge_y_train_hat = ridge_best_model.predict(x_train)
ridge_y_val_hat = ridge_best_model.predict(x_val)
ridge_y_test_hat = ridge_best_model.predict(x_test)

#evaluation metrics for train val test
ridge_train_metrics = calculate_metrics(y_train, ridge_y_train_hat)
ridge_val_metrics = calculate_metrics(y_val, ridge_y_val_hat)
ridge_test_metrics = calculate_metrics(y_test, ridge_y_test_hat)

#cv rmse score
ridge_cv_scores = cross_val_score(ridge_best_model, x_train, y_train, 
                                 cv=5, scoring='neg_mean_squared_error')
ridge_cv_rmse = np.sqrt(-ridge_cv_scores.mean())

#store result
results['Ridge_Regression'] = {
    "train_metrics": ridge_train_metrics,
    "val_metrics": ridge_val_metrics,
    "test_metrics": ridge_test_metrics,
    "cv_rmse": ridge_cv_rmse,
    "model": ridge_best_model
}

print(f"Ridge Regression:\nR²: {ridge_test_metrics['R2']:.4f}\nTest RMSE: {ridge_test_metrics['RMSE']:,.0f}")

Best Ridge params: {'regressor__alpha': 1.0, 'regressor__solver': 'svd'}
Ridge Regression:
R²: 0.6800
Test RMSE: 3,356,861


In [9]:
#Lasso Regression

#pipeline
lasso_pipeline = Pipeline([
    ('scaler', RobustScaler()),
    ('regressor', Lasso(random_state=42, max_iter=2000))
])

# Parameter grid
lasso_params = {
    'regressor__alpha': [0.01, 0.1, 1.0, 10.0, 100.0],
    'regressor__selection': ['cyclic', 'random']
}

# Grid search
lasso_grid = GridSearchCV(lasso_pipeline, lasso_params, cv=5, 
                         scoring='neg_mean_squared_error', n_jobs=-1, verbose=0)

#fit model
lasso_grid.fit(x_train, y_train)

#Best model
lasso_best_model = lasso_grid.best_estimator_
print(f"Best Lasso params: {lasso_grid.best_params_}")

#prediction for train val test data
lasso_y_train_hat = lasso_best_model.predict(x_train)
lasso_y_val_hat = lasso_best_model.predict(x_val)
lasso_y_test_hat = lasso_best_model.predict(x_test)

#evaluation metrics for train val test
lasso_train_metrics = calculate_metrics(y_train, lasso_y_train_hat)
lasso_val_metrics = calculate_metrics(y_val, lasso_y_val_hat)
lasso_test_metrics = calculate_metrics(y_test, lasso_y_test_hat)

#cv rmse score
lasso_cv_scores = cross_val_score(lasso_best_model, x_train, y_train, 
                                 cv=5, scoring='neg_mean_squared_error')
lasso_cv_rmse = np.sqrt(-lasso_cv_scores.mean())

# Store results
results["Lasso_Regression"] = {
    "train_metrics": lasso_train_metrics,
    "val_metrics": lasso_val_metrics,
    "test_metrics": lasso_test_metrics,
    "cv_rmse": lasso_cv_rmse,
    "model": lasso_best_model
}

print(f"Lasso Regression:\nTest R²: {lasso_test_metrics['R2']:.4f}\nTest RMSE: ${lasso_test_metrics['RMSE']:,.0f}")

Best Lasso params: {'regressor__alpha': 0.01, 'regressor__selection': 'cyclic'}
Lasso Regression:
Test R²: 0.6687
Test RMSE: $3,415,583


In [10]:
# ElasticNet Regression

# Pipeline
elastic_pipeline = Pipeline([
    ('scaler', RobustScaler()),
    ('regressor', ElasticNet(random_state=42, max_iter=2000))
])

# Parameter grid
elastic_params = {
    'regressor__alpha': [0.01, 0.1, 1.0, 10.0],
    'regressor__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]
}

# Grid search
elastic_grid = GridSearchCV(elastic_pipeline, elastic_params, cv=5, 
                           scoring='neg_mean_squared_error', n_jobs=-1, verbose=0)

# Fit model
elastic_grid.fit(x_train, y_train)

# Best model
elastic_best_model = elastic_grid.best_estimator_
print(f"Best ElasticNet params: {elastic_grid.best_params_}")

# Prediction for train val test data
elastic_y_train_hat = elastic_best_model.predict(x_train)
elastic_y_val_hat = elastic_best_model.predict(x_val)
elastic_y_test_hat = elastic_best_model.predict(x_test)

# Evaluation metrics for train val test
elastic_train_metrics = calculate_metrics(y_train, elastic_y_train_hat)
elastic_val_metrics = calculate_metrics(y_val, elastic_y_val_hat)
elastic_test_metrics = calculate_metrics(y_test, elastic_y_test_hat)

# CV RMSE score
elastic_cv_scores = cross_val_score(elastic_best_model, x_train, y_train, 
                                   cv=5, scoring='neg_mean_squared_error')
elastic_cv_rmse = np.sqrt(-elastic_cv_scores.mean())

# Store results
results["ElasticNet_Regression"] = {
    "train_metrics": elastic_train_metrics,
    "val_metrics": elastic_val_metrics,
    "test_metrics": elastic_test_metrics,
    "cv_rmse": elastic_cv_rmse,
    "model": elastic_best_model
}

print(f"ElasticNet Regression:\nTest R²: {elastic_test_metrics['R2']:.4f}\nTest RMSE: ${elastic_test_metrics['RMSE']:,.0f}")

Best ElasticNet params: {'regressor__alpha': 0.01, 'regressor__l1_ratio': 0.9}
ElasticNet Regression:
Test R²: 0.6784
Test RMSE: $3,365,442


In [11]:
# Random Forest

# Pipeline
rf_pipeline = Pipeline([
    ('regressor', RandomForestRegressor(random_state=42, n_jobs=-1))
])

# Parameter grid
rf_params = {
    'regressor__n_estimators': [100, 200, 300],
    'regressor__max_depth': [10, 20, None],
    'regressor__min_samples_split': [2, 5, 10],
    'regressor__min_samples_leaf': [1, 2, 4],
    'regressor__max_features': ['sqrt', 'log2']
}

# Grid search
rf_grid = GridSearchCV(rf_pipeline, rf_params, cv=5, 
                      scoring='neg_mean_squared_error', n_jobs=-1, verbose=0)

# Fit model
rf_grid.fit(x_train, y_train)

# Best model
rf_best_model = rf_grid.best_estimator_
print(f"Best Random Forest params: {rf_grid.best_params_}")

# Prediction for train val test data
rf_y_train_hat = rf_best_model.predict(x_train)
rf_y_val_hat = rf_best_model.predict(x_val)
rf_y_test_hat = rf_best_model.predict(x_test)

# Evaluation metrics for train val test
rf_train_metrics = calculate_metrics(y_train, rf_y_train_hat)
rf_val_metrics = calculate_metrics(y_val, rf_y_val_hat)
rf_test_metrics = calculate_metrics(y_test, rf_y_test_hat)

# CV RMSE score
rf_cv_scores = cross_val_score(rf_best_model, x_train, y_train, 
                              cv=5, scoring='neg_mean_squared_error')
rf_cv_rmse = np.sqrt(-rf_cv_scores.mean())

# Store results
results["Random_Forest"] = {
    "train_metrics": rf_train_metrics,
    "val_metrics": rf_val_metrics,
    "test_metrics": rf_test_metrics,
    "cv_rmse": rf_cv_rmse,
    "model": rf_best_model
}

print(f"Random Forest:\nTest R²: {rf_test_metrics['R2']:.4f}\nTest RMSE: ${rf_test_metrics['RMSE']:,.0f}")

Best Random Forest params: {'regressor__max_depth': 20, 'regressor__max_features': 'log2', 'regressor__min_samples_leaf': 1, 'regressor__min_samples_split': 2, 'regressor__n_estimators': 300}
Random Forest:
Test R²: 0.7707
Test RMSE: $2,841,306


In [12]:
# Gradient Boosting

# Pipeline
gb_pipeline = Pipeline([
    ('regressor', GradientBoostingRegressor(random_state=42))
])

# Parameter grid
gb_params = {
    'regressor__n_estimators': [100, 200, 300],
    'regressor__max_depth': [3, 5, 7],
    'regressor__learning_rate': [0.01, 0.1, 0.2],
    'regressor__subsample': [0.8, 0.9, 1.0],
    'regressor__min_samples_split': [2, 5, 10]
}

# Grid search
gb_grid = GridSearchCV(gb_pipeline, gb_params, cv=5, 
                      scoring='neg_mean_squared_error', n_jobs=-1, verbose=0)

# Fit model
gb_grid.fit(x_train, y_train)

# Best model
gb_best_model = gb_grid.best_estimator_
print(f"Best Gradient Boosting params: {gb_grid.best_params_}")

# Prediction for train val test data
gb_y_train_hat = gb_best_model.predict(x_train)
gb_y_val_hat = gb_best_model.predict(x_val)
gb_y_test_hat = gb_best_model.predict(x_test)

# Evaluation metrics for train val test
gb_train_metrics = calculate_metrics(y_train, gb_y_train_hat)
gb_val_metrics = calculate_metrics(y_val, gb_y_val_hat)
gb_test_metrics = calculate_metrics(y_test, gb_y_test_hat)

# CV RMSE score
gb_cv_scores = cross_val_score(gb_best_model, x_train, y_train, 
                              cv=5, scoring='neg_mean_squared_error')
gb_cv_rmse = np.sqrt(-gb_cv_scores.mean())

# Store results
results["Gradient_Boosting"] = {
    "train_metrics": gb_train_metrics,
    "val_metrics": gb_val_metrics,
    "test_metrics": gb_test_metrics,
    "cv_rmse": gb_cv_rmse,
    "model": gb_best_model
}

print(f"Gradient Boosting:\nTest R²: {gb_test_metrics['R2']:.4f}\nTest RMSE: ${gb_test_metrics['RMSE']:,.0f}")

Best Gradient Boosting params: {'regressor__learning_rate': 0.1, 'regressor__max_depth': 7, 'regressor__min_samples_split': 10, 'regressor__n_estimators': 100, 'regressor__subsample': 0.8}
Gradient Boosting:
Test R²: 0.7613
Test RMSE: $2,899,352


In [13]:
# XGBoost

# Pipeline
xgb_pipeline = Pipeline([
    ('regressor', xgb.XGBRegressor(random_state=42, n_jobs=-1))
])

# Parameter grid
xgb_params = {
    'regressor__n_estimators': [100, 200, 300],
    'regressor__max_depth': [3, 5, 7],
    'regressor__learning_rate': [0.01, 0.1, 0.2],
    'regressor__subsample': [0.8, 0.9, 1.0],
    'regressor__colsample_bytree': [0.8, 0.9, 1.0],
    'regressor__reg_alpha': [0, 0.1, 1],
    'regressor__reg_lambda': [1, 1.5, 2]
}

# Grid search
xgb_grid = GridSearchCV(xgb_pipeline, xgb_params, cv=5, 
                       scoring='neg_mean_squared_error', n_jobs=-1, verbose=0)

# Fit model
xgb_grid.fit(x_train, y_train)

# Best model
xgb_best_model = xgb_grid.best_estimator_
print(f"Best XGBoost params: {xgb_grid.best_params_}")

# Prediction for train val test data
xgb_y_train_hat = xgb_best_model.predict(x_train)
xgb_y_val_hat = xgb_best_model.predict(x_val)
xgb_y_test_hat = xgb_best_model.predict(x_test)

# Evaluation metrics for train val test
xgb_train_metrics = calculate_metrics(y_train, xgb_y_train_hat)
xgb_val_metrics = calculate_metrics(y_val, xgb_y_val_hat)
xgb_test_metrics = calculate_metrics(y_test, xgb_y_test_hat)

# CV RMSE score
xgb_cv_scores = cross_val_score(xgb_best_model, x_train, y_train, 
                               cv=5, scoring='neg_mean_squared_error')
xgb_cv_rmse = np.sqrt(-xgb_cv_scores.mean())

# Store results
results["XGBoost"] = {
    "train_metrics": xgb_train_metrics,
    "val_metrics": xgb_val_metrics,
    "test_metrics": xgb_test_metrics,
    "cv_rmse": xgb_cv_rmse,
    "model": xgb_best_model
}

print(f"XGBoost:\nTest R²: {xgb_test_metrics['R2']:.4f}\nTest RMSE: ${xgb_test_metrics['RMSE']:,.0f}")

Best XGBoost params: {'regressor__colsample_bytree': 0.8, 'regressor__learning_rate': 0.2, 'regressor__max_depth': 7, 'regressor__n_estimators': 100, 'regressor__reg_alpha': 0.1, 'regressor__reg_lambda': 1, 'regressor__subsample': 0.9}
XGBoost:
Test R²: 0.7654
Test RMSE: $2,874,335


In [14]:
# LightGBM

# Pipeline
lgb_pipeline = Pipeline([
    ('regressor', lgb.LGBMRegressor(random_state=42, n_jobs=-1, verbose=-1))
])

# Parameter grid
lgb_params = {
    'regressor__n_estimators': [100, 200, 300],
    'regressor__max_depth': [3, 5, 7],
    'regressor__learning_rate': [0.01, 0.1, 0.2],
    'regressor__subsample': [0.8, 0.9, 1.0],
    'regressor__colsample_bytree': [0.8, 0.9, 1.0],
    'regressor__reg_alpha': [0, 0.1, 1],
    'regressor__reg_lambda': [1, 1.5, 2]
}

# Grid search
lgb_grid = GridSearchCV(lgb_pipeline, lgb_params, cv=5, 
                       scoring='neg_mean_squared_error', n_jobs=-1, verbose=0)

# Fit model
lgb_grid.fit(x_train, y_train)

# Best model
lgb_best_model = lgb_grid.best_estimator_
print(f"Best LightGBM params: {lgb_grid.best_params_}")

# Prediction for train val test data
lgb_y_train_hat = lgb_best_model.predict(x_train)
lgb_y_val_hat = lgb_best_model.predict(x_val)
lgb_y_test_hat = lgb_best_model.predict(x_test)

# Evaluation metrics for train val test
lgb_train_metrics = calculate_metrics(y_train, lgb_y_train_hat)
lgb_val_metrics = calculate_metrics(y_val, lgb_y_val_hat)
lgb_test_metrics = calculate_metrics(y_test, lgb_y_test_hat)

# CV RMSE score
lgb_cv_scores = cross_val_score(lgb_best_model, x_train, y_train, 
                               cv=5, scoring='neg_mean_squared_error')
lgb_cv_rmse = np.sqrt(-lgb_cv_scores.mean())

# Store results
results["LightGBM"] = {
    "train_metrics": lgb_train_metrics,
    "val_metrics": lgb_val_metrics,
    "test_metrics": lgb_test_metrics,
    "cv_rmse": lgb_cv_rmse,
    "model": lgb_best_model
}

print(f"LightGBM:\nTest R²: {lgb_test_metrics['R2']:.4f}\nTest RMSE: ${lgb_test_metrics['RMSE']:,.0f}")

Best LightGBM params: {'regressor__colsample_bytree': 0.8, 'regressor__learning_rate': 0.2, 'regressor__max_depth': 5, 'regressor__n_estimators': 300, 'regressor__reg_alpha': 0.1, 'regressor__reg_lambda': 1, 'regressor__subsample': 0.8}
LightGBM:
Test R²: 0.7182
Test RMSE: $3,150,033


In [17]:
# RESULTS SUMMARY


print("FINAL RESULTS SUMMARY\n")

print("="*95)
print(f"{'Model':<20} {'Train R²':<9} {'Val R²':<9} {'Test R²':<9} {'Train RMSE':<11} {'Val RMSE':<11} {'Test RMSE':<11} {'CV RMSE':<10}")
print("="*95)

# Sort models by test R²
sorted_results = sorted(results.items(), key=lambda x: x[1]['test_metrics']['R2'], reverse=True)

for model_name, metrics in sorted_results:
    train_r2 = metrics['train_metrics']['R2']
    val_r2 = metrics['val_metrics']['R2']
    test_r2 = metrics['test_metrics']['R2']
    train_rmse = metrics['train_metrics']['RMSE']
    val_rmse = metrics['val_metrics']['RMSE']
    test_rmse = metrics['test_metrics']['RMSE']
    cv_rmse = metrics['cv_rmse']
    
    print(f"{model_name:<20} {train_r2:<9.4f} {val_r2:<9.4f} {test_r2:<9.4f} {train_rmse:<11.0f} "
          f"{val_rmse:<11.0f} {test_rmse:<11.0f} {cv_rmse:<10.0f}")

print("="*95)

FINAL RESULTS SUMMARY

Model                Train R²  Val R²    Test R²   Train RMSE  Val RMSE    Test RMSE   CV RMSE   
Random_Forest        0.9757    0.8727    0.7707    1001517     2063782     2841306     2670264   
XGBoost              0.9949    0.8281    0.7654    459787      2398648     2874335     2540570   
Gradient_Boosting    0.9844    0.8474    0.7613    801496      2259810     2899352     2481873   
LightGBM             0.9542    0.8394    0.7182    1375579     2318078     3150033     2657594   
Ridge_Regression     0.8078    0.7629    0.6800    2817297     2817132     3356861     2998211   
ElasticNet_Regression 0.8083    0.7597    0.6784    2813799     2835823     3365442     2996216   
Linear_Regression    0.8093    0.7435    0.6692    2806194     2929694     3412918     3003509   
Lasso_Regression     0.8093    0.7435    0.6687    2806194     2929694     3415583     3003442   


In [18]:
# Best model information
best_model_name, best_metrics = sorted_results[0]

print(f"\nBEST MODEL: {best_model_name}")
print(f"Test R²: {best_metrics['test_metrics']['R2']:.4f}")
print(f"Test RMSE: ${best_metrics['test_metrics']['RMSE']:,.2f}")
print(f"Cross-validation RMSE: ${best_metrics['cv_rmse']:,.2f}")

# Get best model for predictions
best_model = results[best_model_name]['model']

# Feature importance for tree-based models
if hasattr(best_model.named_steps['regressor'], 'feature_importances_'):
    print(f"\n=== Feature Importance ({best_model_name}) ===")
    importances = best_model.named_steps['regressor'].feature_importances_
    feature_names = x_train.columns
    
    importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': importances
    }).sort_values('importance', ascending=False)
    
    print(importance_df.head(10).to_string(index=False))


BEST MODEL: Random_Forest
Test R²: 0.7707
Test RMSE: $2,841,306.17
Cross-validation RMSE: $2,670,263.52

=== Feature Importance (Random_Forest) ===
           feature  importance
               ram    0.166967
         weight_kg    0.135221
               ssd    0.126712
       clock_speed    0.112841
      total_pixels    0.086560
 category_Notebook    0.085610
       screen_size    0.044767
   category_Gaming    0.025603
manufacturer_Razer    0.023241
               hdd    0.019679
