In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter("ignore")

In [5]:
df = pd.read_csv("Laptop_analyzed.csv")
df.drop(["Unnamed: 0"], axis = 1, inplace = True)
df.head()

Unnamed: 0,Company,TypeName,Inches,Ram,OpSys,Weight,Price,ppi,IPS,TouchScreen,FullHD,CPU_processor,Ghz,SSD,HDD,GPU
0,Apple,Ultrabook,13.3,2.079442,MAC,0.314811,11.175755,5.424875,1,0,0,Intel Core i5,2.3,4.859812,0,Intel
1,Apple,Ultrabook,13.3,2.079442,MAC,0.29267,10.776777,4.849511,0,0,0,Intel Core i5,1.8,0.0,0,Intel
2,HP,Notebook,15.6,2.079442,Others,0.620576,10.329931,4.950262,0,0,1,Intel Core i5,2.5,5.549076,0,Intel
3,Apple,Ultrabook,15.4,2.772589,MAC,0.604316,11.814476,5.396055,1,0,0,Intel Core i7,2.7,6.240276,0,AMD
4,Apple,Ultrabook,13.3,2.079442,MAC,0.314811,11.473101,5.424875,1,0,0,Intel Core i5,3.1,5.549076,0,Intel


# ***Hyper-Parameter-Tunning***

In [38]:
X = df.drop("Price", axis = 1)
X = pd.get_dummies(X, drop_first = True, dtype = int)
y = df["Price"]

In [7]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score

***Best Random State***

In [39]:
sc = StandardScaler()

In [40]:
train_score = []
test_score = []
cv_score = []
MAE = []

for i in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = i)
    X_train.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]] = sc.fit_transform(X_train.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]])
    X_test.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]] = sc.transform(X_test.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]])
    model = LinearRegression().fit(X_train, y_train)
    
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)
    
    train_accuracy = model.score(X_train, y_train)
    test_accuracy = model.score(X_test, y_test)
    score = cross_val_score(model, X_train, y_train, cv = 6).mean()
    mae = mean_absolute_error(y_test, pred_test)
    
    train_score.append(train_accuracy)
    test_score.append(test_accuracy)
    cv_score.append(score)
    MAE.append(mae)

random_state = pd.DataFrame({
    "Train": train_score,
    "Test": test_score,
    "CV": cv_score,
    "MAE": mae
})

In [41]:
random_state.sort_values(by = "MAE", ascending = True)

Unnamed: 0,Train,Test,CV,MAE
0,0.844442,0.810700,0.828146,0.202285
72,0.838212,0.848992,0.822364,0.202285
71,0.837348,0.850090,0.821661,0.202285
70,0.844203,0.810357,0.830131,0.202285
69,0.848530,0.779553,0.834022,0.202285
...,...,...,...,...
28,0.844005,0.811755,0.828289,0.202285
27,0.846689,0.792430,0.829553,0.202285
26,0.844088,0.816177,0.826066,0.202285
36,0.840065,0.837708,0.818195,0.202285


## ***GridSearchCV For - PolynomialFeatures***

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 72)
scaler = StandardScaler()
X_train.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]] = scaler.fit_transform(X_train.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]])
X_test.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]] = scaler.transform(X_test.loc[:, ["Inches", "Ram", "Weight", "ppi", "Ghz", "SSD", "HDD"]])

In [43]:
estimator = PolynomialFeatures()

param_grid = {"degree": list(range(1, 10))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'degree': 1}

# ***GridSearchCV - Ridge Regression***

In [44]:
estimator = Ridge()

param_grid = {"alpha": [0.001, 0.1, 1, 2, 3, 4, 10, 12]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'alpha': 1}

# ***GridSearchCV - Lasso Regression***

In [45]:
estimator = Lasso()

param_grid = {"alpha": [0.001, 0.1, 1, 2, 3, 4]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'alpha': 0.001}

# ***GridSearchCV - ElasticNet***

In [46]:
estimator = ElasticNet()

param_grid = {"alpha": list(range(1, 15)), "l1_ratio": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'alpha': 1, 'l1_ratio': 0}

# ***GridSearchCV - SVR***

In [58]:
estimator = SVR()

param_grid = {"C": [0.001, 0.1, 1, 10], "kernel": ["linear", "poly", "sigmoid", "rbf"]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'C': 1, 'kernel': 'rbf'}

# ***GridSearchCV - KNearestNeighbors - KNeighborsRegressor***

In [48]:
estimator = KNeighborsRegressor()

param_grid = {"n_neighbors": list(range(1, 20))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

grid_model.best_params_

{'n_neighbors': 7}

# ***GridSearchCV - Decision Tree - DecisionTreeRegressor***

In [49]:
estimator = DecisionTreeRegressor(random_state = 0)

param_grid = {"max_depth": list(range(9, 20)), "criterion": ["squared_error", "friedman_mse", "absolute_error","poisson"]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print(grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

{'criterion': 'poisson', 'max_depth': 10}


array([3.77649542e-02, 6.26030193e-01, 3.96495074e-02, 2.96514529e-02,
       2.62772195e-03, 8.84676349e-04, 3.70774136e-03, 9.78475427e-02,
       1.71528011e-02, 1.50218118e-03, 3.47743707e-05, 4.00156177e-03,
       5.41762881e-04, 5.71685104e-04, 0.00000000e+00, 0.00000000e+00,
       3.88743910e-03, 0.00000000e+00, 0.00000000e+00, 8.45821173e-04,
       1.78252175e-03, 9.97665247e-04, 1.14266744e-09, 1.34134210e-03,
       0.00000000e+00, 4.91501624e-04, 7.08236014e-06, 0.00000000e+00,
       0.00000000e+00, 2.79746981e-04, 5.98761967e-02, 2.40636794e-04,
       9.88078630e-03, 2.23416552e-04, 1.08619206e-02, 1.25391456e-02,
       2.14594509e-02, 3.12231353e-03, 2.62206210e-03, 4.08038022e-03,
       3.49201321e-03])

# ***GridSearchCV - RandomForestRegressor***

In [50]:
estimator = RandomForestRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(1, 40))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'n_estimators': 38}


array([2.71484083e-02, 5.71136717e-01, 5.85932496e-02, 5.49927232e-02,
       5.43314983e-03, 5.58808530e-03, 2.92985475e-03, 6.90952886e-02,
       2.35755971e-02, 5.42859399e-03, 1.38283501e-04, 4.64415671e-03,
       1.23923709e-04, 4.19090103e-03, 1.37502220e-04, 1.89438514e-05,
       5.61431537e-03, 1.40339260e-05, 1.52800316e-05, 4.70272559e-03,
       7.87838203e-04, 4.67186283e-04, 7.38303841e-05, 1.27481776e-03,
       8.44990727e-05, 9.32668426e-04, 2.30685304e-04, 3.56261035e-04,
       9.90149783e-04, 3.11273742e-04, 5.97218575e-02, 1.29494499e-03,
       1.25017854e-02, 5.53274995e-03, 5.50666987e-03, 5.44002193e-03,
       2.51111957e-02, 5.95587673e-03, 2.32358533e-02, 3.13307466e-03,
       3.53502665e-03])

# ***AdaBoostRegressor***

In [52]:
estimator = AdaBoostRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(39, 50))}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'n_estimators': 46}


array([0.02177273, 0.33284456, 0.09318523, 0.07831608, 0.        ,
       0.00138796, 0.        , 0.10892404, 0.12206648, 0.00560483,
       0.        , 0.00077075, 0.        , 0.0016968 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.00045837,
       0.        , 0.        , 0.        , 0.0021348 , 0.        ,
       0.        , 0.00114645, 0.        , 0.        , 0.        ,
       0.03403099, 0.0120999 , 0.02086393, 0.02019112, 0.        ,
       0.00149245, 0.03446215, 0.04881012, 0.00627687, 0.0034661 ,
       0.04799729])

# ***GradientBoostingRegressor***

In [53]:
estimator = GradientBoostingRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(29, 40)), "learning_rate": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'learning_rate': 0.4, 'n_estimators': 38}


array([3.10091983e-02, 4.42894715e-01, 3.98310575e-02, 4.48368751e-02,
       2.46452049e-03, 3.61979893e-03, 1.49893008e-04, 7.37734244e-02,
       1.51922439e-01, 2.32018602e-03, 0.00000000e+00, 2.02807501e-03,
       2.99764458e-04, 2.60118587e-03, 0.00000000e+00, 0.00000000e+00,
       9.81074522e-04, 0.00000000e+00, 1.75117840e-04, 2.18277279e-03,
       0.00000000e+00, 3.42838883e-04, 0.00000000e+00, 2.24359859e-03,
       0.00000000e+00, 1.31986438e-03, 7.63039537e-04, 4.36807248e-04,
       1.04456040e-03, 2.65146332e-04, 8.29625146e-02, 1.39539241e-03,
       1.25822630e-02, 8.45225678e-03, 3.71423685e-03, 4.70464664e-04,
       2.75985779e-02, 2.52268833e-02, 2.76137082e-02, 1.52219474e-03,
       9.55554264e-04])

# ***XGBRegressor***

In [56]:
estimator = XGBRegressor(random_state = 0)

param_grid = {"n_estimators": list(range(49, 60)), "learning_rate": [0, 0.1, 0.2, 0.3, 0.4, 0.5],
             "gamma":[0, 0.1, 0.2, 0.3, 0.4, 0.5] }

grid_model = GridSearchCV(estimator, param_grid, cv = 5, scoring = "r2")

grid_model.fit(X_train, y_train)

print("Best Params :", grid_model.best_params_)

grid_model.best_estimator_.feature_importances_

Best Params : {'gamma': 0, 'learning_rate': 0.2, 'n_estimators': 58}


array([0.0179959 , 0.3540014 , 0.00966325, 0.02646129, 0.00604147,
       0.00972715, 0.0030644 , 0.03794548, 0.04675371, 0.00676282,
       0.        , 0.00967124, 0.00132074, 0.00919413, 0.00359572,
       0.        , 0.00870921, 0.        , 0.01275516, 0.004646  ,
       0.00402268, 0.00151713, 0.00324109, 0.01481077, 0.0007999 ,
       0.00434524, 0.00406085, 0.00575307, 0.01143412, 0.00297909,
       0.1255264 , 0.00833534, 0.04534537, 0.02395263, 0.00275351,
       0.02812136, 0.06433311, 0.05001335, 0.01331957, 0.0040275 ,
       0.01299888], dtype=float32)