# **Predicting Future Laptop Prices**

Dataset: https://www.kaggle.com/datasets/muhammetvarl/laptop-price

----

Independed Var: Company, Product, Operating System

Dependent Var: Laptop Price

----

In [197]:
import opendatasets as od

# download the dataset (this is a Kaggle dataset)
# during download you will be required to input your Kaggle username and password
od.download("https://www.kaggle.com/datasets/muhammetvarl/laptop-price")

Skipping, found downloaded files in "./laptop-price" (use force=True to force download)


In [198]:
import numpy as np
import pandas as pd
import matplotlib as plt
import sklearn
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error




In [199]:
#Data information 
raw_data = pd.read_csv('laptop_price.csv')
print("Data :", len(raw_data))
print("rows :", len(raw_data.columns))
raw_data.head()

Data : 1303
rows : 13


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


In [200]:
raw_data.iloc[1282]

laptop_ID                                           1300
Company                                               HP
Product                                 Stream 11-Y000na
TypeName                                         Netbook
Inches                                              11.6
ScreenResolution                                1366x768
Cpu                 Intel Celeron Dual Core N3060 1.6GHz
Ram                                                  2GB
Memory                                32GB Flash Storage
Gpu                                Intel HD Graphics 400
OpSys                                         Windows 10
Weight                                            1.17kg
Price_euros                                        209.0
Name: 1282, dtype: object

In [201]:
#Gets every unique value from each collum specified
CompanyColValues = pd.unique(raw_data['Company'])
RamColValues = pd.unique(raw_data["Ram"])
GpuColValues = pd.unique(raw_data['Gpu'])
ScreenSizeColValues = pd.unique(raw_data['Inches'])
ProductColValues = pd.unique(raw_data["Product"])
TypeNameColValues = pd.unique(raw_data["TypeName"])
CpuColValues = pd.unique(raw_data['Cpu'])
MemmoryColValues = pd.unique(raw_data['Memory'])

print(len(GpuColValues))

print("Company: ", CompanyColValues)
print("Ram: ", RamColValues)
print("Gpu: ", GpuColValues)
print("ScreenSize: ", ScreenSizeColValues)
print("Product: ", ProductColValues)
print("Type: ", TypeNameColValues)

110
Company:  ['Apple' 'HP' 'Acer' 'Asus' 'Dell' 'Lenovo' 'Chuwi' 'MSI' 'Microsoft'
 'Toshiba' 'Huawei' 'Xiaomi' 'Vero' 'Razer' 'Mediacom' 'Samsung' 'Google'
 'Fujitsu' 'LG']
Ram:  ['8GB' '16GB' '4GB' '2GB' '12GB' '6GB' '32GB' '24GB' '64GB']
Gpu:  ['Intel Iris Plus Graphics 640' 'Intel HD Graphics 6000'
 'Intel HD Graphics 620' 'AMD Radeon Pro 455'
 'Intel Iris Plus Graphics 650' 'AMD Radeon R5' 'Intel Iris Pro Graphics'
 'Nvidia GeForce MX150' 'Intel UHD Graphics 620' 'Intel HD Graphics 520'
 'AMD Radeon Pro 555' 'AMD Radeon R5 M430' 'Intel HD Graphics 615'
 'AMD Radeon Pro 560' 'Nvidia GeForce 940MX' 'Intel HD Graphics 400'
 'Nvidia GeForce GTX 1050' 'AMD Radeon R2' 'AMD Radeon 530'
 'Nvidia GeForce 930MX' 'Intel HD Graphics' 'Intel HD Graphics 500'
 'Nvidia GeForce 930MX ' 'Nvidia GeForce GTX 1060' 'Nvidia GeForce 150MX'
 'Intel Iris Graphics 540' 'AMD Radeon RX 580' 'Nvidia GeForce 920MX'
 'AMD Radeon R4 Graphics' 'AMD Radeon 520' 'Nvidia GeForce GTX 1070'
 'Nvidia GeForce GTX 1050

In [202]:
#Get the data I want and sets for the X Value
X = raw_data[['Ram', 'Gpu','Cpu','Memory']].values

In [203]:
#encode each differnt colunm to a number, so sklearn can
#predict a model off of it

#Sets the values from the X list into numbers
from sklearn import preprocessing
LE_Gpu = preprocessing.LabelEncoder()
LE_Gpu.fit(RamColValues)
X[:,0] = LE_Gpu.transform(X[:,0])

LE_RAM = preprocessing.LabelEncoder()
LE_RAM.fit(GpuColValues)
X[:,1] = LE_RAM.transform(X[:,1])

LE_Company = preprocessing.LabelEncoder()
LE_Company.fit(CpuColValues)
X[:,2] = LE_Company.transform(X[:,2])

LE_Companya = preprocessing.LabelEncoder()
LE_Companya.fit(MemmoryColValues)
X[:,3] = LE_Companya.transform(X[:,3])
X


array([[8, 58, 65, 4],
       [8, 51, 63, 2],
       [8, 53, 74, 16],
       ...,
       [3, 40, 34, 35],
       [7, 21, 89, 10],
       [5, 40, 34, 26]], dtype=object)

In [204]:
#Dependent Variable
y = raw_data["Price_euros"]
y[0:5]

0    1339.69
1     898.94
2     575.00
3    2537.45
4    1803.60
Name: Price_euros, dtype: float64

In [205]:
#splits in to train, test split
X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, y, test_size=0.2, random_state=5119)


In [206]:
#Actual Model
regr = linear_model.LinearRegression()
regr.fit(X_trainset, y_trainset)


# Final Ouput Areas

Coefficients

In [207]:
#Coefficients
print ('Coefficients: ', regr.coef_)

Coefficients:  [-37.66159507   7.16630247  11.65235284  14.21595291]


R2 Score

In [208]:
#r2 Score and MSE
y_hat = regr.predict(X_testset)
print('R2 Score' , r2_score(y_testset,y_hat))
print('MSE:', mean_squared_error(y_testset,y_hat))

print('Predicted Values:', y_hat[0:5])
print('Actual Values:', y_testset[0:5])

R2 Score 0.5212756438984736
MSE: 178281.28483713986
Predicted Values: [ 534.44643885 1929.45449178 1023.25190306  -17.64330438  894.9208957 ]
Actual Values: 1200     499.0
186     2397.0
81      1510.0
371      426.0
1301     764.0
Name: Price_euros, dtype: float64


## Maximise the best seed

In [209]:
#Does a loop to find the best seed in the first 10000 seeds possible
#bumps up the F2 Score by like 0.2-0.3

ldict = {}
for i in range(1,10000):
    X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, y, test_size=0.2, random_state=i)
    regr = linear_model.LinearRegression()
    regr.fit(X_trainset, y_trainset)
    y_hat = regr.predict(X_testset)
    ldict[i] = r2_score(y_testset,y_hat)


Keymax = max(zip(ldict.values(), ldict.keys()))[1]
print(Keymax)



5119


In [210]:
print(ldict[7202])

0.5054985557906466


## Predicting Value

(Doesn't work atm)

In [211]:
valueToPredict = np.array([["16GB","AMD Radeon Pro 455", "Intel Core i5 2.3GHz", "500GB HHD"]])
print("Predicting Price for: ", valueToPredict)


pLE_Gpu = preprocessing.LabelEncoder()
pLE_Gpu.fit(RamColValues)
valueToPredict[:,0] = pLE_Gpu.transform(X[:,0])

pLE_RAM = preprocessing.LabelEncoder()
pLE_RAM.fit(GpuColValues)
valueToPredict[:,1] = pLE_RAM.transform(X[:,1])

pLE_Company = preprocessing.LabelEncoder()
pLE_Company.fit(CpuColValues)
valueToPredict[:,2] = pLE_Company.transform(X[:,2])

pLE_Companya = preprocessing.LabelEncoder()
pLE_Companya.fit(MemmoryColValues)
valueToPredict[:,3] = pLE_Companya.transform(X[:,3])
valueToPredict

my_array = np.array(valueToPredict, dtype=int)
print(my_array)

pred = regr.predict(my_array)
#print(pred)

Predicting Price for:  [['16GB' 'AMD Radeon Pro 455' 'Intel Core i5 2.3GHz' '500GB HHD']]


ValueError: y contains previously unseen labels: 8