In [87]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression


In [89]:
df = pd.read_csv("data.csv")
df.head()


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,brand,name,price,spec_rating,processor,CPU,Ram,Ram_type,ROM,ROM_type,GPU,display_size,resolution_width,resolution_height,OS,warranty
0,0,0,HP,Victus 15-fb0157AX Gaming Laptop,49900,73.0,5th Gen AMD Ryzen 5 5600H,"Hexa Core, 12 Threads",8GB,DDR4,512GB,SSD,4GB AMD Radeon RX 6500M,15.6,1920.0,1080.0,Windows 11 OS,1
1,1,1,HP,15s-fq5007TU Laptop,39900,60.0,12th Gen Intel Core i3 1215U,"Hexa Core (2P + 4E), 8 Threads",8GB,DDR4,512GB,SSD,Intel UHD Graphics,15.6,1920.0,1080.0,Windows 11 OS,1
2,2,2,Acer,One 14 Z8-415 Laptop,26990,69.323529,11th Gen Intel Core i3 1115G4,"Dual Core, 4 Threads",8GB,DDR4,512GB,SSD,Intel Iris Xe Graphics,14.0,1920.0,1080.0,Windows 11 OS,1
3,3,3,Lenovo,Yoga Slim 6 14IAP8 82WU0095IN Laptop,59729,66.0,12th Gen Intel Core i5 1240P,"12 Cores (4P + 8E), 16 Threads",16GB,LPDDR5,512GB,SSD,Intel Integrated Iris Xe,14.0,2240.0,1400.0,Windows 11 OS,1
4,4,4,Apple,MacBook Air 2020 MGND3HN Laptop,69990,69.323529,Apple M1,Octa Core (4P + 4E),8GB,DDR4,256GB,SSD,Apple M1 Integrated Graphics,13.3,2560.0,1600.0,Mac OS,1


In [90]:
df.columns = df.columns.str.strip().str.lower()
df.columns


Index(['unnamed: 0.1', 'unnamed: 0', 'brand', 'name', 'price', 'spec_rating',
       'processor', 'cpu', 'ram', 'ram_type', 'rom', 'rom_type', 'gpu',
       'display_size', 'resolution_width', 'resolution_height', 'os',
       'warranty'],
      dtype='object')

In [92]:
df.drop(columns=["name", "unnamed: 0", "unnamed: 0.1"], inplace=True, errors="ignore")


In [93]:
def clean_ram(val):
    val = str(val).upper().strip()
    return int(val.replace("GB", ""))

df["ram"] = df["ram"].apply(clean_ram)


In [94]:
def clean_rom(val):
    val = str(val).upper().strip()
    if "TB" in val:
        return int(float(val.replace("TB", "")) * 1024)
    elif "GB" in val:
        return int(float(val.replace("GB", "")))
    else:
        return np.nan

df["rom"] = df["rom"].apply(clean_rom)


In [95]:
FEATURE_COLS = [
    "ram",
    "ram_type",
    "rom",
    "rom_type",
    "gpu",
    "display_size",
    "resolution_width",
    "resolution_height",
    "os",
    "warranty",
    "spec_rating"
]

TARGET_COL = "price"

X = df[FEATURE_COLS]
y = df[TARGET_COL]

X.head()


Unnamed: 0,ram,ram_type,rom,rom_type,gpu,display_size,resolution_width,resolution_height,os,warranty,spec_rating
0,8,DDR4,512,SSD,4GB AMD Radeon RX 6500M,15.6,1920.0,1080.0,Windows 11 OS,1,73.0
1,8,DDR4,512,SSD,Intel UHD Graphics,15.6,1920.0,1080.0,Windows 11 OS,1,60.0
2,8,DDR4,512,SSD,Intel Iris Xe Graphics,14.0,1920.0,1080.0,Windows 11 OS,1,69.323529
3,16,LPDDR5,512,SSD,Intel Integrated Iris Xe,14.0,2240.0,1400.0,Windows 11 OS,1,66.0
4,8,DDR4,256,SSD,Apple M1 Integrated Graphics,13.3,2560.0,1600.0,Mac OS,1,69.323529


In [96]:
cat_cols = [
    "ram_type",
    "rom_type",
    "gpu",
    "os",
    "warranty"
]

num_cols = [
    "ram",
    "rom",
    "display_size",
    "resolution_width",
    "resolution_height",
    "spec_rating"
]


In [97]:
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
        ("num", StandardScaler(), num_cols)
    ]
)

pipe = Pipeline([
    ("preprocess", preprocessor),
    ("model", LinearRegression())
])


In [98]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [99]:
pipe.fit(X_train, y_train)
print("✅ Model trained successfully")


✅ Model trained successfully


In [100]:
with open("laptop_price_model.pkl", "wb") as f:
    pickle.dump(pipe, f)

print("✅ Model saved")


✅ Model saved
