In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter("ignore")

In [2]:
df = pd.read_csv("Laptop_analyzed.csv")
df.drop("Unnamed: 0", axis = 1, inplace = True)

In [3]:
df.head()

Unnamed: 0,Company,TypeName,Inches,Ram,OpSys,Weight,Price,ppi,IPS,TouchScreen,FullHD,CPU_processor,Ghz,SSD,HDD,GPU
0,Apple,Ultrabook,13.3,2.079442,MAC,0.314811,11.175755,5.424875,1,0,0,Intel Core i5,2.3,4.859812,0,Intel
1,Apple,Ultrabook,13.3,2.079442,MAC,0.29267,10.776777,4.849511,0,0,0,Intel Core i5,1.8,0.0,0,Intel
2,HP,Notebook,15.6,2.079442,Others,0.620576,10.329931,4.950262,0,0,1,Intel Core i5,2.5,5.549076,0,Intel
3,Apple,Ultrabook,15.4,2.772589,MAC,0.604316,11.814476,5.396055,1,0,0,Intel Core i7,2.7,6.240276,0,AMD
4,Apple,Ultrabook,13.3,2.079442,MAC,0.314811,11.473101,5.424875,1,0,0,Intel Core i5,3.1,5.549076,0,Intel


In [4]:
X = df.drop("Price", axis = 1)
y = df["Price"]

## ***Libraries***

In [15]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [9]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

In [148]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 72)

# ***LinearRegression***

In [150]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = LinearRegression()

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.8382120210180102
Testing Accuracy : 0.8489921506312583
Cross Validation Score : 0.8191621924866823

MAE : 0.18991953833042358
MSE : 0.05969003541674716


# ***Polynomial Regression***

In [151]:
# After Hyperparameter tuning it provides degree = 1

# ***Ridge***

In [152]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = Ridge(alpha = 1)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.8373758256166443
Testing Accuracy : 0.8484888567381426
Cross Validation Score : 0.8194374460197593

MAE : 0.1898113333479545
MSE : 0.05988897626936306


# ***Lasso***

In [153]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = Lasso(alpha = 0.001)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.8295163257096495
Testing Accuracy : 0.8414443032635757
Cross Validation Score : 0.8140970052412753

MAE : 0.19344869021022842
MSE : 0.06267353116601143


# ***ElasticNet***

In [154]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = ElasticNet(alpha = 1, l1_ratio = 0)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.6964550598638373
Testing Accuracy : 0.713950574369474
Cross Validation Score : 0.6935172038967972

MAE : 0.274521891966438
MSE : 0.1130689591183637


# ***SVR***

In [196]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = SVR(C = 0.4, kernel = "rbf")

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.9208355052320516
Testing Accuracy : 0.8887489333826298
Cross Validation Score : 0.8724443795461486

MAE : 0.16217382838564498
MSE : 0.04397506576182196


# ***KNeighborsRegressor***

In [197]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = KNeighborsRegressor(n_neighbors = 7)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.8923124946401574
Testing Accuracy : 0.8624309550112023
Cross Validation Score : 0.8385556898328611

MAE : 0.18068024793475024
MSE : 0.054377975727460316


# ***DecisionTreeRegressor***

In [160]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = DecisionTreeRegressor(max_depth = 10, criterion = "poisson")

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.9440052683204715
Testing Accuracy : 0.7964943309416481
Cross Validation Score : 0.8174952499997905

MAE : 0.2004723214770564
MSE : 0.08044125285129917


# ***RandomForestRegressor***

In [162]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = RandomForestRegressor(n_estimators = 38)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.980954116538298
Testing Accuracy : 0.8665456447038009
Cross Validation Score : 0.8789553587228729

MAE : 0.1694036647436481
MSE : 0.05275153064856645


# ***AdaBoostRegressor***

In [163]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = AdaBoostRegressor(n_estimators = 46)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.8345126233311092
Testing Accuracy : 0.8088516985784552
Cross Validation Score : 0.8005660195980877

MAE : 0.22284669194154677
MSE : 0.07555666099079512


# ***GradientBoostingRegressor***

In [166]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = GradientBoostingRegressor(n_estimators = 38, learning_rate = 0.4)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.929244649015775
Testing Accuracy : 0.872115406040796
Cross Validation Score : 0.8668021055502179

MAE : 0.17128959093551752
MSE : 0.0505499281963903


# ***XGBRegressor***

In [168]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = XGBRegressor(n_estimators = 58, gamma = 0, learning_rate = 0.2)

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.9762084935067746
Testing Accuracy : 0.8948036113291163
Cross Validation Score : 0.8862651238479382

MAE : 0.15685425388614366
MSE : 0.04158178658743763


#### ***SVR is considered best model with***
- Train Accuracy -> 92
- Test Accuracy -> 89
- CV -> 87
- MAE - > 1621

# ***FINAL MODEL***

In [198]:
process = ColumnTransformer(transformers = [
    ("encode", OneHotEncoder(sparse = False, drop = "first"), [0, 1, 4, 10, 14]),
    ("scale", StandardScaler(), [2, 3, 5, 6, 11, 12, 13])
], remainder = "passthrough")

model = SVR(C = 0.4, kernel = "rbf")

pipe = Pipeline([
    ("process", process),
    ("model", model)
])

pipe.fit(X_train, y_train)

pred_train = pipe.predict(X_train)
pred_test = pipe.predict(X_test)
score = cross_val_score(pipe, X_train, y_train, cv = 5).mean()

print("Training Accuracy :", pipe.score(X_train, y_train))
print("Testing Accuracy :", pipe.score(X_test, y_test))
print("Cross Validation Score :", score)
print()
print("MAE :", mean_absolute_error(y_test, pred_test))
print("MSE :", mean_squared_error(y_test, pred_test))

Training Accuracy : 0.9208355052320516
Testing Accuracy : 0.8887489333826298
Cross Validation Score : 0.8724443795461486

MAE : 0.16217382838564498
MSE : 0.04397506576182196


# ***Predicting on New Data***

In [223]:
new_data = pd.DataFrame({
    'Company':"Apple",
    'TypeName':"Ultrabook", 
    'Inches':13.3, 
    'Ram':8, 
    'OpSys':"MAC", 
    'Weight':1.37, 
    'ppi':5678, 
    'IPS':1,
    'TouchScreen':0, 
    'FullHD':0, 
    'CPU_processor':"Intel Core i5", 
    'Ghz':2.3, 
    'SSD':128, 
    'HDD':1000, 
    'GPU':"Intel"
}, index = [0])

new_data["Weight"] = np.log(new_data["Weight"])
new_data["ppi"] = np.log(new_data["ppi"])
new_data["SSD"] = np.log(new_data["SSD"] + 1)
new_data["Ram"] = np.log(new_data["Ram"])
# new_data["Price"] = np.log(new_data["Price"])

new_data

Unnamed: 0,Company,TypeName,Inches,Ram,OpSys,Weight,ppi,IPS,TouchScreen,FullHD,CPU_processor,Ghz,SSD,HDD,GPU
0,Apple,Ultrabook,13.3,2.079442,MAC,0.314811,8.644354,1,0,0,Intel Core i5,2.3,4.859812,1000,Intel


In [224]:
pred = pipe.predict(new_data)[0]
print("Price of the laptop for this specifaction :", round(np.exp(pred)))

Price of the laptop for this specifaction : 66973


In [225]:
from joblib import dump
dump(pipe, "SVR_model.joblib")

['SVR_model.joblib']