In [2]:
# Import Libraries
import pandas as pd
import numpy as np
import re
import pickle
from sklearn.preprocessing import OrdinalEncoder

In [3]:
# Load Model and Data
model_path = "../Models/cat_model-v4.pkl"
data_path = "../Data/laptop_price - dataset.csv"

# Load Model
with open(model_path, "rb") as f:
    model = pickle.load(f)
print("✅ Model loaded successfully.")

# Load Data
df = pd.read_csv(data_path)
print(f"✅ Data loaded with shape {df.shape}.")


✅ Model loaded successfully.
✅ Data loaded with shape (1275, 15).


In [4]:
# Memory Processing Function
def memory_split(memory):
    try:
        if '+' in memory:
            mem1, mem2 = memory.split('+')
            mem_type = mem1.split(' ')[1] + '+' + mem2.split(' ')[1]
            mem1_capacity = int(re.findall(r'\d+', mem1)[0])
            mem2_capacity = int(re.findall(r'\d+', mem2)[0])
            if 'GB' in mem1 and 'GB' in mem2:
                total_capacity = mem1_capacity + mem2_capacity
            elif 'TB' in mem1 and 'GB' in mem2:
                total_capacity = mem1_capacity * 1024 + mem2_capacity
            elif 'GB' in mem1 and 'TB' in mem2:
                total_capacity = mem1_capacity + mem2_capacity * 1024
            else:
                total_capacity = mem1_capacity * 1024 + mem2_capacity * 1024
            return total_capacity, mem_type
        else:
            capacity = int(re.findall(r'\d+', memory)[0])
            mem_type = memory.split(' ')[1]
            if 'TB' in memory:
                capacity *= 1024
            return capacity, mem_type
    except:
        return np.nan, np.nan


In [5]:
# Extract Screen Resolution
def process_screen_resolution(df):
    df[['Resolution_Width', 'Resolution_Height']] = df['ScreenResolution'].str.extract(r'(\d{3,4})x(\d{3,4})').astype(int)
    df['Contains_HD'] = df['ScreenResolution'].str.contains('HD', case=False).astype(int)
    df['Contains_IPS'] = df['ScreenResolution'].str.contains('IPS', case=False).astype(int)
    df['Contains_Touchscreen'] = df['ScreenResolution'].str.contains('Touchscreen', case=False).astype(int)
    df['Contains_4K'] = df['ScreenResolution'].str.contains('4K', case=False).astype(int)
    df['Contains_Quad_HD_plus'] = df['ScreenResolution'].str.contains('Quad HD+', case=False).astype(int)
    df.drop(['ScreenResolution'], axis=1, inplace=True)


In [6]:
# Extract CPU Features
def extract_family(cpu_type, company):
    patterns = {
        "Intel": r'^(Core|Xeon|Pentium|Celeron|Atom|Core M)',
        "AMD": r'^(Ryzen|A[0-9]|FX|Athlon|E[0-9]|Pro|Sempron)',
        "Samsung": r'^(Exynos)',
    }
    match = re.search(patterns.get(company, 'Unknown'), cpu_type, re.IGNORECASE)
    return match.group(1) if match else 'Unknown'

def extract_generation(cpu_type, company):
    if company == 'Intel':
        match = re.search(r'(\d{4,5}[A-Za-z]*)$', cpu_type)
    elif company == 'AMD':
        match = re.search(r'(\d{4,5})$', cpu_type)
    elif company == 'Samsung':
        match = re.search(r'Exynos (\d+)', cpu_type, re.IGNORECASE)
    else:
        match = None
    return match.group(1)[:1] if match else 'Unknown'

In [7]:
# GPU Processing
def process_gpu(df):
    df['GPU_Family'] = df['GPU_Type'].apply(lambda x: x.split(' ')[0])
    df['GPU_Series'] = df['GPU_Type'].str.extract(r'(\d+)').fillna(df['GPU_Type'])
    df.drop(['GPU_Type'], axis=1, inplace=True)

In [8]:
# Apply Preprocessing to a Random Row
random_row = df.sample(1).reset_index(drop=True)

# Apply Memory Preprocessing
random_row['Memory Capacity'], random_row['Memory Type'] = zip(*random_row['Memory'].apply(memory_split))
random_row.drop(['Memory'], axis=1, inplace=True)

# Apply Screen Resolution Processing
process_screen_resolution(random_row)

# Apply CPU Processing
random_row['CPU_Family'] = random_row.apply(lambda row: extract_family(row['CPU_Type'], row['CPU_Company']), axis=1)
random_row['CPU_Generation'] = random_row.apply(lambda row: extract_generation(row['CPU_Type'], row['CPU_Company']), axis=1)
random_row.drop(['CPU_Type'], axis=1, inplace=True)

# Apply GPU Processing
process_gpu(random_row)

In [9]:
# Encoding Columns
ordinal_cols = ['CPU_Family', 'CPU_Generation', 'GPU_Family', 'GPU_Series']

# Apply Ordinal Encoding
ordinal_encoder = OrdinalEncoder()
random_row[ordinal_cols] = ordinal_encoder.fit_transform(random_row[ordinal_cols].astype(str))

# Apply One-Hot Encoding
encoding_cols = ['Company', 'Product', 'TypeName', 'CPU_Company', 'Memory Type', 'GPU_Company', 'OpSys']
random_row_encoded = pd.get_dummies(random_row, columns=encoding_cols, drop_first=True)

In [10]:
# Reorder Columns to Match Model's Expected Features
model_features = model.feature_names_
random_row_encoded = random_row_encoded.reindex(columns=model_features, fill_value=0)

# Check for Missing and Extra Features
missing_columns = set(model_features) - set(random_row_encoded.columns)
extra_columns = set(random_row_encoded.columns) - set(model_features)

# Display Results
if not missing_columns and not extra_columns:
    print("✅ The preprocessed DataFrame matches the model's expected features!")
else:
    print("🚫 Feature Mismatch Detected!")
    if missing_columns:
        print(f"**Missing Columns:** {missing_columns}")
    if extra_columns:
        print(f"**Extra Columns:** {extra_columns}")

✅ The preprocessed DataFrame matches the model's expected features!


In [11]:
# Predict Laptop Price for Random Row
prediction = model.predict(random_row_encoded)
print(f"💻 Predicted Laptop Price: {round(prediction[0], 2)}")

💻 Predicted Laptop Price: 3854.69
