<a href="https://colab.research.google.com/github/Malaiyarasan/price-prediction-system/blob/main/notebooks/price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# --- CELL 1 ---
# Install required libs
!pip install -q pandas numpy scikit-learn gradio joblib

import pandas as pd
import numpy as np
from pathlib import Path

# Create folder for data
DATA_DIR = Path("data")
DATA_DIR.mkdir(exist_ok=True)

# Generate synthetic dataset (if you don't have a real one)
csv_path = DATA_DIR / "product_prices.csv"

if not csv_path.exists():
    np.random.seed(42)

    n = 500
    df = pd.DataFrame({
        "product_weight": np.random.uniform(0.2, 10.0, n),
        "product_size": np.random.uniform(5, 50, n),
        "category_encoded": np.random.randint(0, 5, n),
        "rating": np.random.uniform(1.0, 5.0, n),
        "discount_percent": np.random.uniform(0, 60, n),
    })

    # Generate target price (depends on weight + size + rating - discounts)
    df["price"] = (
        (df["product_weight"] * 120) +
        (df["product_size"] * 8) +
        (df["rating"] * 40) -
        (df["discount_percent"] * 2.5) +
        np.random.normal(0, 25, n)
    ).round(2)

    df.to_csv(csv_path, index=False)
    print("Created dataset at:", csv_path)
else:
    print("Dataset already exists:", csv_path)

df = pd.read_csv(csv_path)
df.head()




Created dataset at: data/product_prices.csv


Unnamed: 0,product_weight,product_size,category_encoded,rating,discount_percent,price
0,3.870493,36.417277,3,3.351537,41.273047,727.94
1,9.517,29.124336,2,4.794333,22.515594,1518.88
2,7.373541,18.928743,4,2.010111,32.32174,1046.77
3,6.066853,41.620776,0,2.437003,31.44955,1113.0
4,1.728983,35.812903,4,2.517434,50.882867,498.02


In [3]:
# --- CELL 2 ---
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
from pathlib import Path

MODEL_DIR = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

df = pd.read_csv("data/product_prices.csv")

# Features & target
X = df.drop("price", axis=1)
y = df["price"]

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Model
model = RandomForestRegressor(
    n_estimators=240,
    max_depth=12,
    random_state=42
)

# Train
print("Training model...")
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("\nModel Performance:")
print("MAE :", mae)
print("RMSE:", rmse)
print("RÂ²  :", r2)

# Save model
model_path = MODEL_DIR / "price_model.joblib"
joblib.dump(model, model_path)
print("\nSaved price prediction model to:", model_path)


Training model...

Model Performance:
MAE : 51.11258529307526
RMSE: 62.687783262993825
RÂ²  : 0.9702712448845467

Saved price prediction model to: models/price_model.joblib


In [4]:
# --- CELL 3: Gradio Live Demo ---
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from pathlib import Path

# Load model
model_path = Path("models/price_model.joblib")
if not model_path.exists():
    raise FileNotFoundError("Model not found! Run Cell 2 first.")

model = joblib.load(model_path)

def predict_price(weight, size, category, rating, discount):
    try:
        input_data = np.array([[weight, size, category, rating, discount]])
        pred = model.predict(input_data)[0]
        return f"ðŸ’° Estimated Price: â‚¹{pred:.2f}"
    except Exception as e:
        return f"Error: {str(e)}"

demo = gr.Interface(
    fn=predict_price,
    inputs=[
        gr.Number(label="Product Weight (kg)", value=1.0),
        gr.Number(label="Product Size (cm)", value=20),
        gr.Number(label="Category (0â€“4)", value=1),
        gr.Number(label="Rating (1â€“5)", value=4.5),
        gr.Number(label="Discount (%)", value=10),
    ],
    outputs="text",
    title="Price Prediction Model",
    description="Enter product features to estimate its selling price.",
)

# Launch with public URL for portfolio
app = demo.launch(share=True)
print("\nPublic URL:", app.share_url if hasattr(app, 'share_url') else "Check above output")



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://803a16f2a145630f00.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)



Public URL: Check above output
