In [122]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split, cross_val_score

In [123]:
df = pd.read_csv("./../laptop_price.csv", encoding="latin-1")
df.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


In [124]:
df.shape

(1303, 12)

In [125]:
df.isnull().sum()

laptop_ID           0
Company             0
Product             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Gpu                 0
OpSys               0
Weight              0
Price_euros         0
dtype: int64

In [126]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight            1303 non-null   object 
 11  Price_euros       1303 non-null   float64
dtypes: float64(2), int64(1), object(9)
memory usage: 122.3+ KB


In [127]:
df["Ram"] = df["Ram"].str.replace("GB", "").astype('int32')
df["Weight"] = df["Weight"].str.replace("kg", "").astype('float32')

In [128]:
df.corr(numeric_only=True)["Price_euros"]

laptop_ID      0.067830
Inches         0.068197
Ram            0.743007
Weight         0.210370
Price_euros    1.000000
Name: Price_euros, dtype: float64

In [129]:
def change_company(name):
    if name in ("Samsung", "Razer", "Mediacom", "Microsoft", "Xiaomi", "Vero", "Chuwi", "Google", "Fujitsu", "LG", "Huawei"):
        return "Other"
    return name
df["Company"] = df["Company"].apply(change_company)

In [130]:
df.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6


In [131]:
df["TypeName"].value_counts()

TypeName
Notebook              727
Gaming                205
Ultrabook             196
2 in 1 Convertible    121
Workstation            29
Netbook                25
Name: count, dtype: int64

In [132]:
df["Touch"] = df["ScreenResolution"].apply(lambda x: 1 if "Touchscreen" in x else 0)
df["IPS"] = df["ScreenResolution"].apply(lambda x: 1 if "IPS" in x else 0)

In [133]:
df.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Touch,IPS
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,0,1
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94,0,0
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0,0,0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45,0,1
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,0,1


In [134]:
df["Cpu"] = df["Cpu"].apply(lambda x: " ".join(x.split()[:3]))

In [135]:
def change_cpu(name):
    if "Intel Core i" in name:
        return name
    if name.split()[0] == "AMD":
        return "AMD"
    return "Other"
df["Processor"] = df["Cpu"].apply(change_cpu)

In [136]:
df["Gpu"].value_counts()

Gpu
Intel HD Graphics 620      281
Intel HD Graphics 520      185
Intel UHD Graphics 620      68
Nvidia GeForce GTX 1050     66
Nvidia GeForce GTX 1060     48
                          ... 
AMD Radeon R5 520            1
AMD Radeon R7                1
Intel HD Graphics 540        1
AMD Radeon 540               1
ARM Mali T860 MP4            1
Name: count, Length: 110, dtype: int64

In [137]:
def change_gpu(name):
    if "GTX" in name:
        return "Nvidia GTX"
    if "RTX" in name:
        return "Nvidia RTX"
    if "Nvidia" in name:
        return "Nvidia"
    if "AMD" in name:
        return "AMD"
    if "Intel" in name:
        return "Intel"
    return name
df["Gpu"] = df["Gpu"].apply(change_gpu)

In [138]:
df["Gpu"].value_counts()

Gpu
Intel                722
Nvidia GTX           239
AMD                  180
Nvidia               161
ARM Mali T860 MP4      1
Name: count, dtype: int64

In [139]:
df.rename(columns = {'Gpu':'GPU'}, inplace = True)
df.rename(columns = {'OpSys':'OS'}, inplace = True)

In [140]:
df = df[df["GPU"] != "ARM Mali T860 MP4"]

In [141]:
def change_os(name):
    if "Linux" in name:
        return "Linux"
    if "Windows" in name:
        return "Windows"
    if "mac" in name.lower() :
        return "Mac"
    return "Other"
df["OS"] = df["OS"].apply(change_os)

In [142]:
df = df.drop(columns=["laptop_ID", "Inches", "ScreenResolution", "Cpu", "Product"])

In [143]:
df.head()

Unnamed: 0,Company,TypeName,Ram,GPU,OS,Weight,Price_euros,Touch,IPS,Processor
0,Apple,Ultrabook,8,Intel,Mac,1.37,1339.69,0,1,Intel Core i5
1,Apple,Ultrabook,8,Intel,Mac,1.34,898.94,0,0,Intel Core i5
2,HP,Notebook,8,Intel,Other,1.86,575.0,0,0,Intel Core i5
3,Apple,Ultrabook,16,AMD,Mac,1.83,2537.45,0,1,Intel Core i7
4,Apple,Ultrabook,8,Intel,Mac,1.37,1803.6,0,1,Intel Core i5


In [146]:
df["Company"].value_counts()

Company
Dell       297
Lenovo     297
HP         274
Asus       158
Acer       103
MSI         54
Other       50
Toshiba     48
Apple       21
Name: count, dtype: int64

In [106]:
df.shape

(1302, 10)

In [107]:
df = pd.get_dummies(df)

In [108]:
df.head()

Unnamed: 0,Ram,Weight,Price_euros,Touch,IPS,Company_Acer,Company_Apple,Company_Asus,Company_Dell,Company_HP,...,GPU_Nvidia GTX,OS_Linux,OS_Mac,OS_Other,OS_Windows,Processor_AMD,Processor_Intel Core i3,Processor_Intel Core i5,Processor_Intel Core i7,Processor_Other
0,8,1.37,1339.69,0,1,False,True,False,False,False,...,False,False,True,False,False,False,False,True,False,False
1,8,1.34,898.94,0,0,False,True,False,False,False,...,False,False,True,False,False,False,False,True,False,False
2,8,1.86,575.0,0,0,False,False,False,False,True,...,False,False,False,True,False,False,False,True,False,False
3,16,1.83,2537.45,0,1,False,True,False,False,False,...,False,False,True,False,False,False,False,False,True,False
4,8,1.37,1803.6,0,1,False,True,False,False,False,...,False,False,True,False,False,False,False,True,False,False


In [109]:
x = df.drop("Price_euros", axis=1)
y = df["Price_euros"]

In [110]:
x_train, x_test, y_train, y_test = train_test_split(x, y)

In [111]:
def get_model_accuracy(model):
    model.fit(x_train, y_train)
    accuracy = model.score(x_test, y_test)
    print(f'{str(model)} Accuracy --> {str(accuracy)}')

In [112]:
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
cross_val_score(lr_model, x, y, cv=5).mean()
# get_model_accuracy(lr_model)

0.6824002220099864

In [113]:
from sklearn.linear_model import Lasso
lasso_model = Lasso()
cross_val_score(lasso_model, x, y, cv=5).mean()
# get_model_accuracy(lasso_model)

0.6833918328068679

In [114]:
from sklearn.tree import DecisionTreeRegressor
dt_model = DecisionTreeRegressor()
cross_val_score(dt_model, x, y, cv=5).mean()
# get_model_accuracy(dt_model)

0.6249917755315664

In [115]:
from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor()
# cross_val_score(rf_model, x, y, cv=5).mean()
get_model_accuracy(rf_model)

RandomForestRegressor() Accuracy --> 0.7917517167168965


In [116]:
from sklearn.model_selection import GridSearchCV
params = {
    'n_estimators': [10, 50, 100],
    'criterion': ["squared_error", "friedman_mse", "squared_error"]
}

grid = GridSearchCV(estimator=rf_model, param_grid=params)
grid_fit = grid.fit(x_train, y_train)
best = grid_fit.best_estimator_
best

In [117]:
best.score(x_test, y_test)

0.7957617255080809

In [118]:
# with open("model.pickle", "wb") as file:
#     pickle.dump(best, file)

In [147]:
x_train.columns

Index(['Ram', 'Weight', 'Touch', 'IPS', 'Company_Acer', 'Company_Apple',
       'Company_Asus', 'Company_Dell', 'Company_HP', 'Company_Lenovo',
       'Company_MSI', 'Company_Other', 'Company_Toshiba',
       'TypeName_2 in 1 Convertible', 'TypeName_Gaming', 'TypeName_Netbook',
       'TypeName_Notebook', 'TypeName_Ultrabook', 'TypeName_Workstation',
       'GPU_AMD', 'GPU_Intel', 'GPU_Nvidia', 'GPU_Nvidia GTX', 'OS_Linux',
       'OS_Mac', 'OS_Other', 'OS_Windows', 'Processor_AMD',
       'Processor_Intel Core i3', 'Processor_Intel Core i5',
       'Processor_Intel Core i7', 'Processor_Other'],
      dtype='object')

In [121]:
df["Pr"].value_counts()

KeyError: 'CPU'