# **Predicting Future Laptop Prices**

Dataset: https://www.kaggle.com/datasets/muhammetvarl/laptop-price

----

Independed Var: Company, Product, Operating System

Dependent Var: Laptop Price

----

In [1]:
import opendatasets as od

# download the dataset (this is a Kaggle dataset)
# during download you will be required to input your Kaggle username and password
od.download("https://www.kaggle.com/datasets/muhammetvarl/laptop-price")

Skipping, found downloaded files in "./laptop-price" (use force=True to force download)


In [57]:
import numpy as np
import pandas as pd
import matplotlib as plt
import sklearn
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error




In [3]:
#Data information 
raw_data = pd.read_csv('laptop_price.csv')
print("Data :", len(raw_data))
print("rows :", len(raw_data.columns))
raw_data.head()

Data : 1303
rows : 13


Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


In [5]:
#Gets every unique value from each collum specified
CompanyColValues = pd.unique(raw_data['Company'])
RamColValues = pd.unique(raw_data["Ram"])
GpuColValues = pd.unique(raw_data['Gpu'])

print(len(GpuColValues))

print("Company: ", CompanyColValues)
print("Ram: ", RamColValues)
print("Gpu: ", GpuColValues)

110
Company:  ['Apple' 'HP' 'Acer' 'Asus' 'Dell' 'Lenovo' 'Chuwi' 'MSI' 'Microsoft'
 'Toshiba' 'Huawei' 'Xiaomi' 'Vero' 'Razer' 'Mediacom' 'Samsung' 'Google'
 'Fujitsu' 'LG']
Ram:  ['8GB' '16GB' '4GB' '2GB' '12GB' '6GB' '32GB' '24GB' '64GB']
Gpu:  ['Intel Iris Plus Graphics 640' 'Intel HD Graphics 6000'
 'Intel HD Graphics 620' 'AMD Radeon Pro 455'
 'Intel Iris Plus Graphics 650' 'AMD Radeon R5' 'Intel Iris Pro Graphics'
 'Nvidia GeForce MX150' 'Intel UHD Graphics 620' 'Intel HD Graphics 520'
 'AMD Radeon Pro 555' 'AMD Radeon R5 M430' 'Intel HD Graphics 615'
 'AMD Radeon Pro 560' 'Nvidia GeForce 940MX' 'Intel HD Graphics 400'
 'Nvidia GeForce GTX 1050' 'AMD Radeon R2' 'AMD Radeon 530'
 'Nvidia GeForce 930MX' 'Intel HD Graphics' 'Intel HD Graphics 500'
 'Nvidia GeForce 930MX ' 'Nvidia GeForce GTX 1060' 'Nvidia GeForce 150MX'
 'Intel Iris Graphics 540' 'AMD Radeon RX 580' 'Nvidia GeForce 920MX'
 'AMD Radeon R4 Graphics' 'AMD Radeon 520' 'Nvidia GeForce GTX 1070'
 'Nvidia GeForce GTX 1050

In [36]:
#Get the data I want
X = raw_data[['Gpu', 'Ram',"Company"]].values

In [37]:
X

array([['Intel Iris Plus Graphics 640', '8GB', 'Apple'],
       ['Intel HD Graphics 6000', '8GB', 'Apple'],
       ['Intel HD Graphics 620', '8GB', 'HP'],
       ...,
       ['Intel HD Graphics', '2GB', 'Lenovo'],
       ['AMD Radeon R5 M330', '6GB', 'HP'],
       ['Intel HD Graphics', '4GB', 'Asus']], dtype=object)

In [38]:
from sklearn import preprocessing
LE_Gpu = preprocessing.LabelEncoder()
LE_Gpu.fit(GpuColValues)
X[:,0] = LE_Gpu.transform(X[:,0])

LE_RAM = preprocessing.LabelEncoder()
LE_RAM.fit(RamColValues)
X[:,1] = LE_RAM.transform(X[:,1])

LE_Company = preprocessing.LabelEncoder()
LE_Company.fit(CompanyColValues)
X[:,2] = LE_Company.transform(X[:,2])

In [39]:
y = raw_data["Price_euros"]
y[0:5]

0    1339.69
1     898.94
2     575.00
3    2537.45
4    1803.60
Name: Price_euros, dtype: float64

In [46]:
X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, y, test_size=0.2, random_state=5)


(1042, 3)
(261, 3)


In [60]:
regr = linear_model.LinearRegression()
regr.fit(X, y)


# Final Ouput Areas

Coefficients

In [61]:
print ('Coefficients: ', regr.coef_)

Coefficients:  [ 13.34555146 -52.06180602  21.8681515 ]


R2 Score

In [63]:
y_hat = regr.predict(X_testset)
print(r2_score(y_testset,y_hat))
print(mean_squared_error(y_testset,y_hat))

0.205083045608924
317147.2770669707


## Predicting Value

In [34]:
valueToPredict = np.array([["Nvidia GeForce MX130", "16GB", "HP"]])
print("Predicting Price for: ", valueToPredict)



pLE_Gpu = preprocessing.LabelEncoder()
pLE_Gpu.fit(GpuColValues)
valueToPredict[:,0] = pLE_Gpu.transform(valueToPredict[:,0])

pLE_RAM = preprocessing.LabelEncoder()
pLE_RAM.fit(RamColValues)
valueToPredict[:,1] = pLE_RAM.transform(valueToPredict[:,1])

pLE_Company = preprocessing.LabelEncoder()
pLE_Company.fit(CompanyColValues)
valueToPredict[:,2] = pLE_Company.transform(valueToPredict[:,2])

my_array = np.array(valueToPredict, dtype=int)
print(my_array)

pred = regr.predict(my_array)
#print(pred)

Predicting Price for:  [['Nvidia GeForce MX130' '16GB' 'HP']]
[[97  1  7]]


ValueError: X has 3 features, but LinearRegression is expecting 4 features as input.