<a href="https://colab.research.google.com/github/Gaurab-Kharal/Advance-data-analysis---sql/blob/main/second_phase_train_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q scikit-learn pandas joblib

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

DATA_PATH = "/content/drive/MyDrive/laptop_price_project/data/laptops_cleaned.csv"

df = pd.read_csv(DATA_PATH)

print(df.shape)
df.head()


(893, 25)


Unnamed: 0,brand,price,spec_rating,processor,CPU,Ram,Ram_type,ROM,ROM_type,GPU,...,Ram_GB,ROM_GB,cpu_brand,cpu_family,cpu_generation,cpu_cores,cpu_threads,gpu_brand,gpu_vram_gb,OS_simple
0,Hp,49900,73.0,5th Gen AMD Ryzen 5 5600H,"Hexa Core, 12 Threads",8,DDR4,512GB,SSD,4GB AMD Radeon RX 6500M,...,8,512.0,AMD,Ryzen5,5.0,6.0,12.0,AMD,4.0,Windows
1,Hp,39900,60.0,12th Gen Intel Core i3 1215U,"Hexa Core (2P + 4E), 8 Threads",8,DDR4,512GB,SSD,Intel UHD Graphics,...,8,512.0,Intel,I3,12.0,6.0,8.0,Intel,,Windows
2,Acer,26990,69.323529,11th Gen Intel Core i3 1115G4,"Dual Core, 4 Threads",8,DDR4,512GB,SSD,Intel Iris Xe Graphics,...,8,512.0,Intel,I3,11.0,,4.0,Intel,,Windows
3,Lenovo,59729,66.0,12th Gen Intel Core i5 1240P,"12 Cores (4P + 8E), 16 Threads",16,LPDDR5,512GB,SSD,Intel Integrated Iris Xe,...,16,512.0,Intel,I5,12.0,12.0,16.0,Intel,,Windows
4,Apple,69990,69.323529,Apple M1,Octa Core (4P + 4E),8,DDR4,256GB,SSD,Apple M1 Integrated Graphics,...,8,256.0,Apple,,,8.0,,Apple,,Mac


In [None]:
from sklearn.model_selection import train_test_split

target = "price"

features = [
    'brand','spec_rating','Ram_GB','ROM_GB',
    'cpu_brand','cpu_family','cpu_generation',
    'cpu_cores','cpu_threads','gpu_brand','gpu_vram_gb','OS_simple'
]

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor

numeric_features = [
    'spec_rating','Ram_GB','ROM_GB',
    'cpu_generation','cpu_cores','cpu_threads','gpu_vram_gb'
]

categorical_features = [
    'brand','cpu_brand','cpu_family','gpu_brand','OS_simple'
]

numeric_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(n_estimators=120, random_state=42, n_jobs=-1))
])


In [None]:
pipeline.fit(X_train, y_train)
print("Model training complete.")


Model training complete.


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

preds = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, preds)
rmse = np.sqrt(mean_squared_error(y_test, preds))

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")


MAE: 13746.18
RMSE: 25845.54


In [None]:
import joblib, json

MODEL_DIR = "/content/drive/MyDrive/laptop_price_project/model/"

joblib.dump(pipeline, MODEL_DIR + "pipeline.joblib")

meta = {
    "features": features
}

with open(MODEL_DIR + "feature_metadata.json", "w") as f:
    json.dump(meta, f)

print("Model saved to Drive.")


Model saved to Drive.


In [None]:
from google.colab import files

# files.download("/content/drive/MyDrive/laptop_price_project/model/pipeline.joblib")
# files.download("/content/drive/MyDrive/laptop_price_project/model/feature_metadata.json")


In [None]:
import sklearn
print(sklearn.__version__)

1.6.1


In [None]:
import joblib
print(joblib.__version__)

1.5.3


In [None]:
!pip install -q gradio

import gradio as gr
import pandas as pd
import joblib

# Load the trained pipeline
pipeline = joblib.load("/content/drive/MyDrive/laptop_price_project/model/pipeline.joblib")

features = ['brand','spec_rating','Ram_GB','ROM_GB','cpu_brand','cpu_family',
            'cpu_generation','cpu_cores','cpu_threads','gpu_brand','gpu_vram_gb','OS_simple']

def predict_price(brand, spec_rating, Ram_GB, ROM_GB, cpu_brand, cpu_family,
                  cpu_generation, cpu_cores, cpu_threads, gpu_brand, gpu_vram_gb, OS_simple):

    data = pd.DataFrame([[brand, spec_rating, Ram_GB, ROM_GB, cpu_brand, cpu_family,
                          cpu_generation, cpu_cores, cpu_threads, gpu_brand, gpu_vram_gb, OS_simple]],
                        columns=features)

    pred = pipeline.predict(data)[0]
    return f"Estimated Laptop Price: NPR {pred:,.0f}"

interface = gr.Interface(
    fn=predict_price,
    inputs=[
        gr.Textbox(label="Brand"),
        gr.Number(label="Spec Rating"),
        gr.Number(label="RAM (GB)"),
        gr.Number(label="Storage (GB)"),
        gr.Textbox(label="CPU Brand"),
        gr.Textbox(label="CPU Family"),
        gr.Number(label="CPU Generation"),
        gr.Number(label="CPU Cores"),
        gr.Number(label="CPU Threads"),
        gr.Textbox(label="GPU Brand"),
        gr.Number(label="GPU VRAM (GB)"),
        gr.Textbox(label="OS")
    ],
    outputs="text",
    title="Laptop Price Predictor",
    description="Enter laptop specifications to predict price."
)

interface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c14e2043f16d5df08e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


