In [1]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import pickle

# Load the dataset
path = r"C:\Users\WELCOME\OneDrive\Desktop\Laptop-Price-Prediction-SmartTechCo\data\laptop_data.csv"
df = pd.read_csv(path)

# Display first 5 rows
df.head()


Unnamed: 0.1,Unnamed: 0,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price
0,0,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,71378.6832
1,1,Apple,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,47895.5232
2,2,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,30636.0
3,3,Apple,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,135195.336
4,4,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,96095.808


In [7]:
df.rename(columns={'Unnamed: 0': 'ID'}, inplace=True)
df.drop('ID', axis=1, inplace=True)  # now this works


In [10]:


# Remove 'GB' and 'kg' text and convert to numeric
df['Ram'] = df['Ram'].str.replace('GB', '').astype(int)
df['Weight'] = df['Weight'].str.replace('kg', '').astype(float)

# Split 'Memory' column into SSD and HDD if possible
df['SSD'] = df['Memory'].apply(lambda x: 1 if 'SSD' in x or 'Flash Storage' in x else 0)
df['HDD'] = df['Memory'].apply(lambda x: 1 if 'HDD' in x else 0)
df.drop('Memory', axis=1, inplace=True)

# Simplify CPU names
df['Cpu'] = df['Cpu'].apply(lambda x: ' '.join(x.split()[:3]))  # Keep first 3 words
df.head()


Unnamed: 0,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price,SSD,HDD
0,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5,8,Intel Iris Plus Graphics 640,macOS,1.37,71378.6832,1,0
1,Apple,Ultrabook,13.3,1440x900,Intel Core i5,8,Intel HD Graphics 6000,macOS,1.34,47895.5232,1,0
2,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5,8,Intel HD Graphics 620,No OS,1.86,30636.0,1,0
3,Apple,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7,16,AMD Radeon Pro 455,macOS,1.83,135195.336,1,0
4,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5,8,Intel Iris Plus Graphics 650,macOS,1.37,96095.808,1,0


In [11]:
# Convert categorical features to numeric using one-hot encoding
categorical_cols = ['Company', 'TypeName', 'ScreenResolution', 'Cpu', 'Gpu', 'OpSys']
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

df_encoded.head()


Unnamed: 0,Inches,Ram,Weight,Price,SSD,HDD,Company_Apple,Company_Asus,Company_Chuwi,Company_Dell,...,Gpu_Nvidia Quadro M620,Gpu_Nvidia Quadro M620M,OpSys_Chrome OS,OpSys_Linux,OpSys_Mac OS X,OpSys_No OS,OpSys_Windows 10,OpSys_Windows 10 S,OpSys_Windows 7,OpSys_macOS
0,13.3,8,1.37,71378.6832,1,0,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,13.3,8,1.34,47895.5232,1,0,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
2,15.6,8,1.86,30636.0,1,0,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
3,15.4,16,1.83,135195.336,1,0,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,13.3,8,1.37,96095.808,1,0,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True


In [12]:
# Separate features and target
X = df_encoded.drop('Price', axis=1)
y = df_encoded['Price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
model = RandomForestRegressor(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"R² Score: {r2:.2f}")
print(f"RMSE: {rmse:.2f}")


R² Score: 0.80
RMSE: 17006.18


In [14]:
# Save model and column names
pickle.dump(model, open('model.pkl', 'wb'))
pickle.dump(X.columns.tolist(), open('model_columns.pkl', 'wb'))
