## Predicting laptop Price

In [1]:
import numpy as np 
import pandas as pd

In [31]:
dataset = pd.read_csv('laptop_price_processed.csv')

In [32]:
dataset

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Ram,OpSys,Weight,Price_euros,IPS_Panel,Retina_Display,...,Total_Pixels,High_Resolution,Product_Series,Cpu_Brand,Cpu_Series,Clock_Speed,Gpu_Manufacturer,Gpu_Series,Memory_Type,Memory_Size(GB)
0,1,Apple,Ultrabook,13.3,8,macOS,1.37,1339.69,1,1,...,4096000,1,MacBook,Intel,Core i5,2.3,Intel,Iris Plus,SSD,128
1,2,Apple,Ultrabook,13.3,8,macOS,1.34,898.94,0,0,...,1296000,0,Macbook,Intel,Core i5,1.8,Intel,HD,Flash Storage,128
2,3,HP,Notebook,15.6,8,No OS,1.86,575.00,0,0,...,2073600,0,250 G6,Intel,Core i5,2.5,Intel,HD,SSD,256
3,4,Apple,Ultrabook,15.4,16,macOS,1.83,2537.45,1,1,...,5184000,1,MacBook,Intel,Core i7,2.7,AMD,Radeon,SSD,512
4,5,Apple,Ultrabook,13.3,8,macOS,1.37,1803.60,1,1,...,4096000,1,MacBook,Intel,Core i5,3.1,Intel,Iris Plus,SSD,256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,2 in 1 Convertible,14.0,4,Windows 10,1.80,638.00,1,0,...,2073600,0,Yoga,Intel,Core i7,2.5,Intel,HD,SSD,128
1299,1317,Lenovo,2 in 1 Convertible,13.3,16,Windows 10,1.30,1499.00,1,0,...,5760000,1,Yoga,Intel,Core i7,2.5,Intel,HD,SSD,512
1300,1318,Lenovo,Notebook,14.0,2,Windows 10,1.50,229.00,0,0,...,1049088,0,IdeaPad,Intel,Celeron,1.6,Intel,HD,Flash Storage,64
1301,1319,HP,Notebook,15.6,6,Windows 10,2.19,764.00,0,0,...,1049088,0,15-AC110nv,Intel,Core i7,2.5,AMD,Radeon,HDD,1024


In [33]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   TypeName          1303 non-null   object 
 3   Inches            1303 non-null   float64
 4   Ram               1303 non-null   int64  
 5   OpSys             1303 non-null   object 
 6   Weight            1303 non-null   float64
 7   Price_euros       1303 non-null   float64
 8   IPS_Panel         1303 non-null   int64  
 9   Retina_Display    1303 non-null   int64  
 10  Touchscreen       1303 non-null   int64  
 11  Resolution_X      1303 non-null   int64  
 12  Resolution_Y      1303 non-null   int64  
 13  Total_Pixels      1303 non-null   int64  
 14  High_Resolution   1303 non-null   int64  
 15  Product_Series    1303 non-null   object 
 16  Cpu_Brand         1303 non-null   object 


### Preprocessing data

In [38]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

categorical_col = ["Company", "TypeName", "OpSys", "Cpu_Brand", "Cpu_Series", "Gpu_Manufacturer", "Gpu_Series", "Memory_Type"]
numeric_col = ["Inches", "Ram", "Weight", "IPS_Panel", "Retina_Display", "Touchscreen", "Resolution_X", "Resolution_Y", "Clock_Speed", "Memory_Size(GB)"]

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric_col),
    ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_col)
])

### spliting training and testing data

In [39]:
X = dataset.drop(columns=['Price_euros', 'laptop_ID']) 
y = dataset['Price_euros']

In [40]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

### linear regression

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
lr_pipeline = Pipeline([
  ('preprocessor', preprocessor),
  ('model', LinearRegression())
  ])
lr_pipeline.fit(X_train, y_train)

In [9]:
y_pred = lr_pipeline.predict(X_test)

In [10]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)
score = r2_score(y_test, y_pred)

In [11]:
print(f'Mean Squared Error {mse}\nr2 score {score}')

Mean Squared Error 92542.11957875443
r2 score 0.8371850904191669


### random forest

In [12]:
from sklearn.ensemble import RandomForestRegressor
rfc_pipeline = Pipeline([
  ('preprocessor', preprocessor),
  ('model', RandomForestRegressor(n_estimators=100, random_state=21, max_depth=10))
])
rfc_pipeline.fit(X_train, y_train)

In [13]:
y_pred2 = rfc_pipeline.predict(X_test)

In [14]:
mse2 = mean_squared_error(y_test, y_pred2)
score2 = r2_score(y_test, y_pred2)
print(f'Mean Squared Error {mse2} \nr2 score {score2}')

Mean Squared Error 95678.23972743553 
r2 score 0.8316675258683829


### XGBoost

In [15]:
from xgboost import XGBRegressor
xgb_pipeline = Pipeline([
  ('preprocessor', preprocessor),
  ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42, max_depth=3))
])
xgb_pipeline.fit(X_train, y_train)

In [16]:
y_pred3 = xgb_pipeline.predict(X_test)

In [17]:
mse3 = mean_squared_error(y_test, y_pred3)
score3 = r2_score(y_test, y_pred3)
print(f'Mean squared error {mse3} \nr2 score {score3}')

Mean squared error 70489.57253269147 
r2 score 0.8759834610386799


In [18]:
from sklearn.svm import SVR
svr_pipeline = Pipeline([
  ('preprocessor', preprocessor),
  ('model', SVR(kernel='sigmoid'))
])
svr_pipeline.fit(X_train, y_train)

In [19]:
y_pred4 = svr_pipeline.predict(X_test)

In [20]:
mse4 = mean_squared_error(y_test, y_pred4)
score4 = r2_score(y_test, y_pred4)
print(f'Mean squared error {mse4} \nr2 score {score4}')

Mean squared error 481439.1059740155 
r2 score 0.15297526288956942


### function to predict price using required parameters

In [21]:
def predict_price(model, user_input):
    input_df = pd.DataFrame([user_input])
    predicted_price = model.predict(input_df)[0]
    return f"Predicted Laptop Price: €{predicted_price:.2f}"

In [22]:
user_input = {
    "Company": "Dell",
    "TypeName": "Ultrabook",
    "Inches": 14.0,
    "Ram": 8,
    "OpSys": "Windows 10",
    "Weight": 1.5,
    "IPS_Panel": 0,
    "Retina_Display": 0,
    "Touchscreen": 0,
    "Resolution_X": 1920,
    "Resolution_Y": 1080,
    "Cpu_Brand": "Intel",
    "Cpu_Series": "i7",
    "Clock_Speed": 2.8,
    "Gpu_Manufacturer": "Intel",
    "Gpu_Series": "UHD",
    "Memory_Type": "SSD",
    "Memory_Size(GB)": 512
}

In [23]:
print(predict_price(xgb_pipeline, user_input))

Predicted Laptop Price: €1464.16
