In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

df1 = pd.read_csv('df.csv')
df1.head()

Unnamed: 0,Company,TypeName,Ram,Weight,Price,Touchscreen,Ips,ppi,Cpu_brand,HDD,SSD,Gpu_brand,os
0,Apple,Ultrabook,8,1.37,71378.6832,0,1,226.983005,Intel Core i5,0,128,Intel,Mac
1,Apple,Ultrabook,8,1.34,47895.5232,0,0,127.67794,Intel Core i5,0,0,Intel,Mac
2,HP,Notebook,8,1.86,30636.0,0,0,141.211998,Intel Core i5,0,256,Intel,Others/No OS/Linux
3,Apple,Ultrabook,16,1.83,135195.336,0,1,220.534624,Intel Core i7,0,512,AMD,Mac
4,Apple,Ultrabook,8,1.37,96095.808,0,1,226.983005,Intel Core i5,0,256,Intel,Mac


In [5]:
df1['os'].unique()

array(['Mac', 'Others/No OS/Linux', 'Windows'], dtype=object)

In [4]:
# Display the table info
df1.columns

Index(['Company', 'TypeName', 'Ram', 'Weight', 'Price', 'Touchscreen', 'Ips',
       'ppi', 'Cpu_brand', 'HDD', 'SSD', 'Gpu_brand', 'os'],
      dtype='object')

In [5]:
# Initialize the label encoder
cat_cols =  ['Company', 'TypeName', 'Cpu_brand', 'Gpu_brand', 'os']

In [6]:
#One hot encoding
print('Dataframe encoded by OHE dimension : ', pd.get_dummies(df1, columns = cat_cols, drop_first = True).shape)

Dataframe encoded by OHE dimension :  (1302, 39)


In [7]:
#Label encoding
en = LabelEncoder()

for cols in cat_cols:
    df1[cols] = en.fit_transform(df1[cols])


In [8]:
x_train = df1.drop('Price', axis=1)
y_train = df1['Price']

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.3, random_state = 0)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

# Create an instance of LinearRegression
model = LinearRegression()

# Fit the model on the training data
model.fit(x_train, y_train)

#Make predictions on the validation data
y_pred = model.predict(x_val)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_val, y_pred)
print("Mean Squared Error:", mse)

# The coefficients
print("Coefficients: \n", model.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_val, y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_val, y_pred))

Mean Squared Error: 458309643.94307107
Coefficients: 
 [ 6.22938610e+02  4.12826498e+03  3.02766623e+03  1.00392593e+04
  8.15206008e+03  1.79848850e+03  1.94422102e+02  1.36378026e+03
 -7.22809249e+00  3.81786355e+01  5.94749376e+03  6.08642491e+03]
Mean squared error: 458309643.94
Coefficient of determination: 0.69


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score,mean_absolute_error
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor,ExtraTreesRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor

Linear Regression

In [11]:
step1 = ColumnTransformer(transformers=[
    ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])
],remainder='passthrough')

step2 = LinearRegression()

pipe = Pipeline([
    ('step1',step1),
    ('step2',step2)
])

pipe.fit(x_train,y_train)

y_pred = pipe.predict(x_val)

# The coefficients
print("Coefficients: \n", model.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_val, y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_val, y_pred))

Coefficients: 
 [ 6.22938610e+02  4.12826498e+03  3.02766623e+03  1.00392593e+04
  8.15206008e+03  1.79848850e+03  1.94422102e+02  1.36378026e+03
 -7.22809249e+00  3.81786355e+01  5.94749376e+03  6.08642491e+03]
Mean squared error: 399512203.27
Coefficient of determination: 0.73




Random Forest

In [12]:
step1 = ColumnTransformer(transformers=[
    ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])
],remainder='passthrough')
step2 = RandomForestRegressor(n_estimators=100,
                              random_state=3,
                              max_samples=0.5,
                              max_features=0.75,
                              max_depth=15)
pipe = Pipeline([
    ('step1',step1),
    ('step2',step2)
])
pipe.fit(x_train,y_train)
y_pred = pipe.predict(x_val)

# The coefficients
print("Coefficients: \n", model.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_val, y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_val, y_pred))



Coefficients: 
 [ 6.22938610e+02  4.12826498e+03  3.02766623e+03  1.00392593e+04
  8.15206008e+03  1.79848850e+03  1.94422102e+02  1.36378026e+03
 -7.22809249e+00  3.81786355e+01  5.94749376e+03  6.08642491e+03]
Mean squared error: 380720922.86
Coefficient of determination: 0.75


In [13]:
import pickle
pickle.dump(pipe,open('laptop_price_model.pkl','wb'))