In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
from matplotlib import pyplot as plt 
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)

In [58]:
df = pd.read_csv("laptop_data_featured.csv")
df.head()

Unnamed: 0,Company,TypeName,Ram,OpSys,Weight,Price,ppi,is_ips,is_touchscreen,Cpu_name,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand
0,Apple,Ultrabook,8,mac,1.37,71378.6832,226.983005,1,0,Intel Core i5,2.3,128.0,0.0,Intel
1,Apple,Ultrabook,8,mac,1.34,47895.5232,127.67794,0,0,Intel Core i5,1.8,0.0,0.0,Intel
2,HP,Notebook,8,other,1.86,30636.0,141.211998,0,0,Intel Core i5,2.5,256.0,0.0,Intel
3,Apple,Ultrabook,16,mac,1.83,135195.336,220.534624,1,0,Intel Core i7,2.7,512.0,0.0,AMD
4,Apple,Ultrabook,8,mac,1.37,96095.808,226.983005,1,0,Intel Core i5,3.1,256.0,0.0,Intel


In [59]:
X = df.drop('Price', axis = 1)
y = df['Price']

In [60]:
y = np.log(y)

In [61]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X , y , test_size = 0.2 , random_state = 42)

## Encode the Company with target encoding

In [62]:
# Compute mean price per company on training data only
mean_price_train = y_train.groupby(X_train['Company']).mean()

In [63]:
X_train['Company_encoded'] = X_train['Company'].map(mean_price_train)

In [64]:
# Test set
# For unseen companies in test, fill with overall mean of train
overall_mean = y_train.mean()
X_test['Company_encoded'] = X_test['Company'].map(mean_price_train).fillna(overall_mean)

In [65]:
X_train.head()

Unnamed: 0,Company,TypeName,Ram,OpSys,Weight,ppi,is_ips,is_touchscreen,Cpu_name,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded
147,Asus,Notebook,4,windows,2.0,141.211998,0,0,other intel,1.1,0.0,1024.0,Intel,10.735512
342,HP,Notebook,8,windows,2.1,141.211998,1,0,Intel Core i3,2.4,0.0,1024.0,Nvidia,10.795093
1003,HP,Notebook,4,windows,1.64,111.935204,0,0,Intel Core i5,2.5,0.0,500.0,Intel,10.795093
814,Dell,Notebook,8,windows,2.18,141.211998,0,0,Intel Core i5,2.5,256.0,0.0,AMD,10.925298
344,Dell,Ultrabook,8,windows,1.2,165.632118,0,0,Intel Core i7,1.8,256.0,0.0,Intel,10.925298


In [66]:
X_test.head()

Unnamed: 0,Company,TypeName,Ram,OpSys,Weight,ppi,is_ips,is_touchscreen,Cpu_name,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded
478,Dell,Notebook,8,windows,2.2,141.211998,0,0,Intel Core i5,1.6,0.0,1024.0,AMD,10.925298
724,MSI,Gaming,8,windows,2.7,127.335675,0,0,Intel Core i7,2.8,128.0,1024.0,Nvidia,11.389193
312,MSI,Gaming,16,windows,2.8,141.211998,1,0,Intel Core i7,2.8,256.0,1024.0,Nvidia,11.389193
851,Asus,Gaming,16,windows,2.1,141.211998,0,0,Intel Core i7,2.8,128.0,1024.0,Nvidia,10.735512
1275,Dell,Notebook,6,windows,2.3,100.45467,0,0,Intel Core i3,2.4,0.0,1024.0,Intel,10.925298


### Encode the TypeName with target encoding

In [67]:
train_type_name_mean_price = y_train.groupby(X_train['TypeName']).mean()

In [68]:
X_train['TypeName_encoded'] = X_train['TypeName'].map(train_type_name_mean_price)

In [69]:
# Test set
# For unseen companies in test, fill with overall mean of train
overall_mean_typename = y_train.mean()
X_test['TypeName_encoded'] = X_test['TypeName'].map(train_type_name_mean_price).fillna(overall_mean_typename)

In [70]:
X_test.head()

Unnamed: 0,Company,TypeName,Ram,OpSys,Weight,ppi,is_ips,is_touchscreen,Cpu_name,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded,TypeName_encoded
478,Dell,Notebook,8,windows,2.2,141.211998,0,0,Intel Core i5,1.6,0.0,1024.0,AMD,10.925298,10.506674
724,MSI,Gaming,8,windows,2.7,127.335675,0,0,Intel Core i7,2.8,128.0,1024.0,Nvidia,11.389193,11.35009
312,MSI,Gaming,16,windows,2.8,141.211998,1,0,Intel Core i7,2.8,256.0,1024.0,Nvidia,11.389193,11.35009
851,Asus,Gaming,16,windows,2.1,141.211998,0,0,Intel Core i7,2.8,128.0,1024.0,Nvidia,10.735512,11.35009
1275,Dell,Notebook,6,windows,2.3,100.45467,0,0,Intel Core i3,2.4,0.0,1024.0,Intel,10.925298,10.506674


In [71]:
train_os_mean_price = y_train.groupby(X_train['OpSys']).mean()
X_train['OpSys_encoded'] = X_train['OpSys'].map(train_os_mean_price)
# for test data 
overall_mean_os = y_train.mean()
X_test['OpSys_encoded'] = X_test['OpSys'].map(train_os_mean_price).fillna(overall_mean_os)

In [72]:
X_train.head()

Unnamed: 0,Company,TypeName,Ram,OpSys,Weight,ppi,is_ips,is_touchscreen,Cpu_name,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded,TypeName_encoded,OpSys_encoded
147,Asus,Notebook,4,windows,2.0,141.211998,0,0,other intel,1.1,0.0,1024.0,Intel,10.735512,10.506674,10.890741
342,HP,Notebook,8,windows,2.1,141.211998,1,0,Intel Core i3,2.4,0.0,1024.0,Nvidia,10.795093,10.506674,10.890741
1003,HP,Notebook,4,windows,1.64,111.935204,0,0,Intel Core i5,2.5,0.0,500.0,Intel,10.795093,10.506674,10.890741
814,Dell,Notebook,8,windows,2.18,141.211998,0,0,Intel Core i5,2.5,256.0,0.0,AMD,10.925298,10.506674,10.890741
344,Dell,Ultrabook,8,windows,1.2,165.632118,0,0,Intel Core i7,1.8,256.0,0.0,Intel,10.925298,11.283332,10.890741


In [73]:
train_Cpu_name_mean_price = y_train.groupby(X_train['Cpu_name']).mean()
X_train['Cpu_name_encoded'] = X_train['Cpu_name'].map(train_Cpu_name_mean_price)
# for test data 
overall_mean_Cpu_name = y_train.mean()
X_test['Cpu_name_encoded'] = X_test['Cpu_name'].map(train_Cpu_name_mean_price).fillna(overall_mean_Cpu_name)

In [74]:
X_train.head()

Unnamed: 0,Company,TypeName,Ram,OpSys,Weight,ppi,is_ips,is_touchscreen,Cpu_name,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded,TypeName_encoded,OpSys_encoded,Cpu_name_encoded
147,Asus,Notebook,4,windows,2.0,141.211998,0,0,other intel,1.1,0.0,1024.0,Intel,10.735512,10.506674,10.890741,10.004135
342,HP,Notebook,8,windows,2.1,141.211998,1,0,Intel Core i3,2.4,0.0,1024.0,Nvidia,10.795093,10.506674,10.890741,10.246165
1003,HP,Notebook,4,windows,1.64,111.935204,0,0,Intel Core i5,2.5,0.0,500.0,Intel,10.795093,10.506674,10.890741,10.834763
814,Dell,Notebook,8,windows,2.18,141.211998,0,0,Intel Core i5,2.5,256.0,0.0,AMD,10.925298,10.506674,10.890741,10.834763
344,Dell,Ultrabook,8,windows,1.2,165.632118,0,0,Intel Core i7,1.8,256.0,0.0,Intel,10.925298,11.283332,10.890741,11.265122


In [75]:
drop_cols = ['Company' , 'TypeName' , 'OpSys' , 'Cpu_name']
X_train.drop(columns = drop_cols , axis = 1 , inplace = True)
X_test.drop(columns = drop_cols , axis = 1 , inplace = True)

In [76]:
X_train.head()

Unnamed: 0,Ram,Weight,ppi,is_ips,is_touchscreen,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded,TypeName_encoded,OpSys_encoded,Cpu_name_encoded
147,4,2.0,141.211998,0,0,1.1,0.0,1024.0,Intel,10.735512,10.506674,10.890741,10.004135
342,8,2.1,141.211998,1,0,2.4,0.0,1024.0,Nvidia,10.795093,10.506674,10.890741,10.246165
1003,4,1.64,111.935204,0,0,2.5,0.0,500.0,Intel,10.795093,10.506674,10.890741,10.834763
814,8,2.18,141.211998,0,0,2.5,256.0,0.0,AMD,10.925298,10.506674,10.890741,10.834763
344,8,1.2,165.632118,0,0,1.8,256.0,0.0,Intel,10.925298,11.283332,10.890741,11.265122


In [77]:
X_test.head()

Unnamed: 0,Ram,Weight,ppi,is_ips,is_touchscreen,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded,TypeName_encoded,OpSys_encoded,Cpu_name_encoded
478,8,2.2,141.211998,0,0,1.6,0.0,1024.0,AMD,10.925298,10.506674,10.890741,10.834763
724,8,2.7,127.335675,0,0,2.8,128.0,1024.0,Nvidia,11.389193,11.35009,10.890741,11.265122
312,16,2.8,141.211998,1,0,2.8,256.0,1024.0,Nvidia,11.389193,11.35009,10.890741,11.265122
851,16,2.1,141.211998,0,0,2.8,128.0,1024.0,Nvidia,10.735512,11.35009,10.890741,11.265122
1275,6,2.3,100.45467,0,0,2.4,0.0,1024.0,Intel,10.925298,10.506674,10.890741,10.246165


In [78]:
train_Gpu_name_mean_price = y_train.groupby(X_train['gpu_brand']).mean()
X_train['gpu_brand_encoded'] = X_train['gpu_brand'].map(train_Gpu_name_mean_price)
# for test data 
overall_mean_gpu_brand = y_train.mean()
X_test['gpu_brand_encoded'] = X_test['gpu_brand'].map(train_Gpu_name_mean_price).fillna(overall_mean_gpu_brand)

In [79]:
X_train.head()

Unnamed: 0,Ram,Weight,ppi,is_ips,is_touchscreen,CPU_Speed_GHz,SSD_GB,HDD_GB,gpu_brand,Company_encoded,TypeName_encoded,OpSys_encoded,Cpu_name_encoded,gpu_brand_encoded
147,4,2.0,141.211998,0,0,1.1,0.0,1024.0,Intel,10.735512,10.506674,10.890741,10.004135,10.70757
342,8,2.1,141.211998,1,0,2.4,0.0,1024.0,Nvidia,10.795093,10.506674,10.890741,10.246165,11.163393
1003,4,1.64,111.935204,0,0,2.5,0.0,500.0,Intel,10.795093,10.506674,10.890741,10.834763,10.70757
814,8,2.18,141.211998,0,0,2.5,256.0,0.0,AMD,10.925298,10.506674,10.890741,10.834763,10.543367
344,8,1.2,165.632118,0,0,1.8,256.0,0.0,Intel,10.925298,11.283332,10.890741,11.265122,10.70757


In [80]:
X_train.drop(columns = ['gpu_brand'] , axis = 1 , inplace = True)
X_test.drop(columns = ['gpu_brand'] , axis = 1 , inplace = True)

In [81]:
X_train.head()

Unnamed: 0,Ram,Weight,ppi,is_ips,is_touchscreen,CPU_Speed_GHz,SSD_GB,HDD_GB,Company_encoded,TypeName_encoded,OpSys_encoded,Cpu_name_encoded,gpu_brand_encoded
147,4,2.0,141.211998,0,0,1.1,0.0,1024.0,10.735512,10.506674,10.890741,10.004135,10.70757
342,8,2.1,141.211998,1,0,2.4,0.0,1024.0,10.795093,10.506674,10.890741,10.246165,11.163393
1003,4,1.64,111.935204,0,0,2.5,0.0,500.0,10.795093,10.506674,10.890741,10.834763,10.70757
814,8,2.18,141.211998,0,0,2.5,256.0,0.0,10.925298,10.506674,10.890741,10.834763,10.543367
344,8,1.2,165.632118,0,0,1.8,256.0,0.0,10.925298,11.283332,10.890741,11.265122,10.70757


In [82]:
X_train_copy = X_train.copy()
X_test_copy = X_test.copy()

In [83]:
X_train_copy.head()

Unnamed: 0,Ram,Weight,ppi,is_ips,is_touchscreen,CPU_Speed_GHz,SSD_GB,HDD_GB,Company_encoded,TypeName_encoded,OpSys_encoded,Cpu_name_encoded,gpu_brand_encoded
147,4,2.0,141.211998,0,0,1.1,0.0,1024.0,10.735512,10.506674,10.890741,10.004135,10.70757
342,8,2.1,141.211998,1,0,2.4,0.0,1024.0,10.795093,10.506674,10.890741,10.246165,11.163393
1003,4,1.64,111.935204,0,0,2.5,0.0,500.0,10.795093,10.506674,10.890741,10.834763,10.70757
814,8,2.18,141.211998,0,0,2.5,256.0,0.0,10.925298,10.506674,10.890741,10.834763,10.543367
344,8,1.2,165.632118,0,0,1.8,256.0,0.0,10.925298,11.283332,10.890741,11.265122,10.70757


In [84]:
# do the scaling 
from sklearn.preprocessing import StandardScaler 
cols_to_scale = ['Company_encoded', 'TypeName_encoded', 'OpSys_encoded', 'CPU_Speed_GHz', 'Cpu_name_encoded', 'gpu_brand_encoded']
scaler = StandardScaler()
X_train[cols_to_scale] = scaler.fit_transform(X_train[cols_to_scale])
X_test[cols_to_scale] = scaler.transform(X_test[cols_to_scale])

## Base Model Train

In [85]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.linear_model import (
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    BayesianRidge,
    SGDRegressor,
    HuberRegressor,
    PassiveAggressiveRegressor
)

from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import (
    RandomForestRegressor,
    ExtraTreesRegressor,
    GradientBoostingRegressor,
    AdaBoostRegressor,
    BaggingRegressor,
    StackingRegressor,
    VotingRegressor
)

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, DotProduct, WhiteKernel

In [86]:
models = {
    "Linear": LinearRegression(),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "ElasticNet": ElasticNet(),
    "DecisionTree": DecisionTreeRegressor(),
    "RandomForest": RandomForestRegressor(),
    "ExtraTrees": ExtraTreesRegressor(),
    "GradientBoosting": GradientBoostingRegressor(),
    "AdaBoost": AdaBoostRegressor(),
    "Bagging": BaggingRegressor(),
    "XGB": XGBRegressor(verbosity = 0),
    "LGBM": LGBMRegressor(),
    "CatBoost": CatBoostRegressor(verbose = 0),
}

In [87]:
for name , model in models.items(): 
    model.fit(X_train , y_train)
    preds = model.predict(X_test)
    print(f"{name}: R2 = {r2_score(y_test, preds):.4f}")

Linear: R2 = 0.8001
Ridge: R2 = 0.8001
Lasso: R2 = 0.4765
ElasticNet: R2 = 0.5182
DecisionTree: R2 = 0.8185
RandomForest: R2 = 0.8966
ExtraTrees: R2 = 0.8968
GradientBoosting: R2 = 0.8878
AdaBoost: R2 = 0.8037
Bagging: R2 = 0.8789
XGB: R2 = 0.9037
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000312 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 228
[LightGBM] [Info] Number of data points in the train set: 1041, number of used features: 13
[LightGBM] [Info] Start training from score 10.820841
LGBM: R2 = 0.8968
CatBoost: R2 = 0.9110


In [88]:
voting_reg = VotingRegressor([
    ('rf', RandomForestRegressor()),
    ('xgb', XGBRegressor(verbosity = 0)),
    ('lgb', LGBMRegressor()), 
    ('dt' , DecisionTreeRegressor(max_depth = 8))
])
voting_reg.fit(X_train , y_train)
pred = voting_reg.predict(X_test)
r2_score(y_test , pred)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000154 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 228
[LightGBM] [Info] Number of data points in the train set: 1041, number of used features: 13
[LightGBM] [Info] Start training from score 10.820841


0.8989873740023774

In [89]:
stacking_reg = StackingRegressor(
    estimators=[
        ('rf', RandomForestRegressor()),
        ('xgb', XGBRegressor(verbosity=0)),
        ('lgb', LGBMRegressor())
    ],
    final_estimator=LinearRegression()
)
stacking_reg.fit(X_train , y_train)
pred = stacking_reg.predict(X_test)
r2_score(y_test , pred)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001624 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 228
[LightGBM] [Info] Number of data points in the train set: 1041, number of used features: 13
[LightGBM] [Info] Start training from score 10.820841
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000138 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 202
[LightGBM] [Info] Number of data points in the train set: 832, number of used features: 13
[LightGBM] [Info] Start training from score 10.817933
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000116 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, 

0.9089836928984565

## Hyper-parameter tuning

In [90]:
from sklearn.model_selection import GridSearchCV

In [91]:
param_grids = {
    "RandomForest": {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['auto', 'sqrt', 'log2']
    },
    
    "ExtraTrees": {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['auto', 'sqrt', 'log2']
    },
    
    "GradientBoosting": {
        'n_estimators': [100, 200, 300],
        'learning_rate': [0.05, 0.1, 0.2],
        'max_depth': [3, 4, 5],
        'subsample': [0.8, 1.0]
    },
    
    "XGB": {
        'n_estimators': [200, 400, 600],
        'learning_rate': [0.05, 0.1, 0.2],
        'max_depth': [3, 5, 7],
        'colsample_bytree': [0.8, 1.0],
        'subsample': [0.8, 1.0]
    },
    
    "LGBM": {
        'n_estimators': [200, 400, 600],
        'learning_rate': [0.05, 0.1, 0.2],
        'num_leaves': [31, 50, 100],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0]
    },
    
    "CatBoost": {
        'iterations': [300, 500, 700],
        'depth': [4, 6, 8],
        'learning_rate': [0.03, 0.05, 0.1],
        'l2_leaf_reg': [3, 5, 7]
    }
}

In [92]:
models = {
    "RandomForest": RandomForestRegressor(random_state = 42),
    "ExtraTrees": ExtraTreesRegressor(random_state = 42),
    "GradientBoosting": GradientBoostingRegressor(random_state = 42),
    "XGB": XGBRegressor(random_state = 42 , verbosity = 0),
    "LGBM": LGBMRegressor(random_state = 42),
    "CatBoost": CatBoostRegressor(random_state = 42 , verbose = 0)
}

In [93]:
best_models = {}

for name , model in models.items(): 
    print(f"Performing Grid Search for {name} ...")

    grid_search = GridSearchCV(
        estimator = model, param_grid = param_grids[name], 
        scoring = 'r2' , n_jobs = -1 , cv = 5 , verbose = 2
    )

    grid_search.fit(X_train, y_train) 

    print(f"Best {name} Cross-Validated R²: {grid_search.best_score_:.4f}")
    print(f"Best Parameters: {grid_search.best_params_}\n")
    best_models[name] = grid_search.best_estimator_

Performing Grid Search for RandomForest ...
Fitting 5 folds for each of 324 candidates, totalling 1620 fits
Best RandomForest Cross-Validated R²: 0.8851
Best Parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}

Performing Grid Search for ExtraTrees ...
Fitting 5 folds for each of 324 candidates, totalling 1620 fits
Best ExtraTrees Cross-Validated R²: 0.8833
Best Parameters: {'max_depth': 30, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}

Performing Grid Search for GradientBoosting ...
Fitting 5 folds for each of 54 candidates, totalling 270 fits
Best GradientBoosting Cross-Validated R²: 0.9005
Best Parameters: {'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 300, 'subsample': 0.8}

Performing Grid Search for XGB ...
Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best XGB Cross-Validated R²: 0.9006
Best Parameters: {'colsample_bytree': 0.8, 'learni

In [94]:
# Evaluate all best models on test set
print("Test Set Evaluation:")

for name, model in best_models.items():
    preds = model.predict(X_test)
    score = r2_score(y_test, preds)
    print(f"{name}: R² = {score:.4f}")

Test Set Evaluation:
RandomForest: R² = 0.9010
ExtraTrees: R² = 0.9006
GradientBoosting: R² = 0.8992
XGB: R² = 0.9071
LGBM: R² = 0.9114
CatBoost: R² = 0.9034


In [95]:
voting_reg = VotingRegressor([
    ('xgb', best_models["XGB"]),
    ('cat', best_models["CatBoost"]),
    ('rf', best_models["RandomForest"])
])

stacking_reg = StackingRegressor(
    estimators=[
        ('xgb', best_models["XGB"]),
        ('cat', best_models["CatBoost"]),
        ('rf', best_models["RandomForest"])
    ],
    final_estimator=LinearRegression()
)

In [96]:
stacking_reg.fit(X_train , y_train)
pred = stacking_reg.predict(X_test)
r2_score(y_test , pred)

0.9087163960050153

In [97]:
voting_reg.fit(X_train , y_train)
pred = voting_reg.predict(X_test)
r2_score(y_test , pred)

0.910076450511467