# Importing libraries and Loading data

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize']=18,5
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Lasso,Ridge
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor

In [2]:
df=pd.read_csv("new_laptop_data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Company,TypeName,Ram,Weight,Price,Touchscreen,IPS,ppi,Cpu_brand,HDD,SSD,Gpu_brand,os
0,0,Apple,Ultrabook,8,1.37,71378.6832,0,1,226.983005,Intel Core i5,0,128,Intel,Mac
1,1,Apple,Ultrabook,8,1.34,47895.5232,0,0,127.67794,Intel Core i5,0,0,Intel,Mac
2,2,HP,Notebook,8,1.86,30636.0,0,0,141.211998,Intel Core i5,0,256,Intel,Others/No OS/Linux
3,3,Apple,Ultrabook,16,1.83,135195.336,0,1,220.534624,Intel Core i7,0,512,AMD,Mac
4,4,Apple,Ultrabook,8,1.37,96095.808,0,1,226.983005,Intel Core i5,0,256,Intel,Mac


In [3]:
df.drop("Unnamed: 0",axis=1,inplace=True)

In [4]:
df.head()

Unnamed: 0,Company,TypeName,Ram,Weight,Price,Touchscreen,IPS,ppi,Cpu_brand,HDD,SSD,Gpu_brand,os
0,Apple,Ultrabook,8,1.37,71378.6832,0,1,226.983005,Intel Core i5,0,128,Intel,Mac
1,Apple,Ultrabook,8,1.34,47895.5232,0,0,127.67794,Intel Core i5,0,0,Intel,Mac
2,HP,Notebook,8,1.86,30636.0,0,0,141.211998,Intel Core i5,0,256,Intel,Others/No OS/Linux
3,Apple,Ultrabook,16,1.83,135195.336,0,1,220.534624,Intel Core i7,0,512,AMD,Mac
4,Apple,Ultrabook,8,1.37,96095.808,0,1,226.983005,Intel Core i5,0,256,Intel,Mac


In [5]:
# Creating Dependent and independent Variables
X=df.drop(columns=["Price"])
y=np.log(df["Price"])

#### Splitting data into train and test test

In [6]:
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.15,random_state=2)

#### Transforming categorical columns into Numiric

In [7]:
transformers=ColumnTransformer(transformers=[('col_tnf',OneHotEncoder(sparse=False,drop="first"),[0,1,7,10,11])]
                               , remainder="passthrough")

In [8]:
# Linear Regression
linear_regressor=LinearRegression()
pipe=Pipeline([
    ("transformers",transformers),
    ("linear_regressor",linear_regressor)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)

print("R2 Score: ", r2_score(y_test,y_pred))
print("MAE: ", mean_absolute_error(y_test,y_pred))

R2 Score:  0.801022031806466
MAE:  0.21201335119224593


In [9]:
#Decision Tree
Decision_Tree= DecisionTreeRegressor(max_depth=8)
pipe=Pipeline([
    ("transformers",transformers),
    ("Decision_Tree",Decision_Tree)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ", r2_score(y_test,y_pred))
print("MAE: ", mean_absolute_error(y_test,y_pred))

R2 Score:  0.829858567883363
MAE:  0.1856748499328913


In [10]:
# SVM
SVM = SVR(kernel="rbf",C=10000,epsilon=0.1)
pipe=Pipeline([
    ("transformers", transformers),
    ("SVM",SVM)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.8148307723301368
MAE:  0.20503200002677208


In [11]:
# AdaBoostRegressor
AdaBoost= AdaBoostRegressor(n_estimators=15,learning_rate=1.0)
pipe=Pipeline([
    ("transformers",transformers),
    ("AdaBoost",AdaBoost)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.7893852237521604
MAE:  0.23548669457602722


In [12]:
# Gradient_Boost
Gradient_Boost=GradientBoostingRegressor(n_estimators=500)
pipe=Pipeline([
    ("transformers",transformers),
    ("Gradient_Boost",Gradient_Boost)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.8820316320882238
MAE:  0.15746230916906967


In [13]:
#xgboost
xgboost=XGBRegressor(n_estimators=45,max_depth=5,learning_rate=0.5)
pipe=Pipeline([
    ("transformers",transformers),
    ("xgboost",xgboost)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.881699977185086
MAE:  0.16128569822229855


In [14]:
# RandomForestRegressor
Random_Forest=RandomForestRegressor(n_estimators=350,
                                    random_state=3,
                                    max_samples=0.5,
                                    max_features=0.75,
                                    max_depth=15)
pipe=Pipeline([
    ("transformers",transformers),
    ("Random_Forest",Random_Forest)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.8826962402444314
MAE:  0.1598839417305015


In [15]:
# KNeighbors
KNN=KNeighborsRegressor(n_neighbors=30)
pipe=Pipeline([
    ("transformers",transformers),
    ("KNN",KNN)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.6602021003913789
MAE:  0.27130125670568767


In [16]:
# ExtraTrees
ExtraTrees=ExtraTreesRegressor(n_estimators=100,
                              random_state=3,
                              max_samples=0.5,
                              max_features=0.75,
                              max_depth=15)
pipe=Pipeline([
    ("transformers",transformers),
    ("ExtraTrees",ExtraTrees)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.8699387227323925
MAE:  0.1615303906801573


In [17]:
# Lasso Regressor
Lasso=Lasso(alpha=0.001)
pipe=Pipeline([
    ("transformers",transformers),
    ("Lasso",Lasso)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.800780398628431
MAE:  0.21283282196103567


In [18]:
# Ridge Regressor
Ridge=Ridge(alpha=10)
pipe=Pipeline([
    ("transformers",transformers),
    ("Ridge",Ridge)
])
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))
print("MAE: ",mean_absolute_error(y_test,y_pred))

R2 Score:  0.8062995196138022
MAE:  0.21101950479191925


In [19]:
from sklearn.ensemble import VotingRegressor,StackingRegressor

step1 = ColumnTransformer(transformers=[
    ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])
],remainder='passthrough')


rf = RandomForestRegressor(n_estimators=350,random_state=3,max_samples=0.5,max_features=0.75,max_depth=15)
gbdt = GradientBoostingRegressor(n_estimators=100,max_features=0.5)
xgb = XGBRegressor(n_estimators=25,learning_rate=0.3,max_depth=5)
et = ExtraTreesRegressor(n_estimators=100,random_state=3,max_samples=0.5,max_features=0.75,max_depth=10)


step2 = VotingRegressor([('rf', rf), ('gbdt', gbdt), ('xgb',xgb), ('et',et)],weights=[5,1,1,1])

pipe = Pipeline([
    ('step1',step1),
    ('step2',step2)
])

pipe.fit(X_train,y_train)

y_pred = pipe.predict(X_test)

print('R2 score',r2_score(y_test,y_pred))
print('MAE',mean_absolute_error(y_test,y_pred))

R2 score 0.8833634537017204
MAE 0.15969502328585625


# Exporting The Model

In [20]:
import pickle
pickle.dump(df,open("df.pkl","wb"))
pickle.dump(pipe,open("pipe.pkl","wb"))