In [1]:
# Make project root importable so `import src...` works
from pathlib import Path
import sys

PROJECT_ROOT = Path().resolve().parents[0]  # one level up from notebooks/
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("Project root on sys.path:", PROJECT_ROOT)


Project root on sys.path: C:\Users\User\Desktop\Projects\Car_Price


In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import json, joblib

from src.inference import load_artifacts, predict_and_cluster
from src.recommend import load_catalog, recommend_by_budget_only, recommend_minimal

PROJECT_ROOT = Path().resolve().parents[0]
MODELS_DIR   = PROJECT_ROOT / "models"
REPORTS_DIR  = PROJECT_ROOT / "reports" / "results"
REPORTS_DIR.mkdir(parents=True, exist_ok=True)

artifacts = load_artifacts(PROJECT_ROOT)
catalog = load_catalog(PROJECT_ROOT)

print("Loaded:")
print(" - model:", type(artifacts["model"]).__name__)
print(" - features:", len(artifacts["feature_columns"]))
print(" - kmeans feats:", artifacts["kmeans_features"])


Loaded:
 - model: LGBMRegressor
 - features: 65
 - kmeans feats: ['Mileage(km)', 'Year', 'Horsepower', 'EngineSize(L)']


In [3]:
samples = [
    {
        "Brand":"Toyota","Model":"Corolla","Year":2018,"Condition":"Used",
        "Mileage(km)":84000,"EngineSize(L)":1.6,"FuelType":"Gasoline","Horsepower":132,"Torque":128,
        "Transmission":"Automatic","DriveType":"FWD","BodyType":"Sedan","Doors":4,"Seats":5,
        "Color":"White","Interior":"Cloth","City":"Berlin","AccidentHistory":"No",
        "Insurance":"Valid","RegistrationStatus":"Complete","FuelEfficiency(L/100km)":6.8,
        "Options":"bluetooth, rear camera, navigation"
    },
    {
        "Brand":"BMW","Model":"3 Series","Year":2019,"Condition":"Used",
        "Mileage(km)":60000,"EngineSize(L)":2.0,"FuelType":"Gasoline","Horsepower":181,"Torque":200,
        "Transmission":"Automatic","DriveType":"RWD","BodyType":"Sedan","Doors":4,"Seats":5,
        "Color":"Black","Interior":"Leather","City":"Paris","AccidentHistory":"No",
        "Insurance":"Valid","RegistrationStatus":"Complete","FuelEfficiency(L/100km)":7.2,
        "Options":"sunroof, navigation, heated seats"
    },
]

pred_rows = []
for i, s in enumerate(samples, 1):
    out = predict_and_cluster(s, artifacts)
    print(f"Sample {i} →", out)
    # sanity
    assert out["predicted_price"] > 0
    assert isinstance(out["cluster_label"], int)

    row = {"sample_id": i, **out}
    pred_rows.append(row)

pred_df = pd.DataFrame(pred_rows)
pred_path = REPORTS_DIR / "smoke_predict_samples.csv"
pred_df.to_csv(pred_path, index=False)
print("Saved:", pred_path)
pred_df


Sample 1 → {'predicted_price': 12540.410306948284, 'cluster_label': 0, 'cluster_name': 'Budget'}
Sample 2 → {'predicted_price': 11447.931733040676, 'cluster_label': 0, 'cluster_name': 'Budget'}
Saved: C:\Users\User\Desktop\Projects\Car_Price\reports\results\smoke_predict_samples.csv


Unnamed: 0,sample_id,predicted_price,cluster_label,cluster_name
0,1,12540.410307,0,Budget
1,2,11447.931733,0,Budget


In [4]:
rec_budget = recommend_by_budget_only(
    budget_usd=18000,
    catalog=catalog,
    pct=0.15,
    top_n=10,
    filters={"BodyType":"Sedan"}  # optional
)

# Sanity checks
assert not rec_budget.empty
assert "Price($)" in rec_budget.columns
assert "Doors" in rec_budget.columns and pd.api.types.is_integer_dtype(rec_budget["Doors"])
assert "similarity" in rec_budget.columns

out_path = REPORTS_DIR / "smoke_reco_budget.csv"
rec_budget.to_csv(out_path, index=False)
print("Saved:", out_path)
rec_budget.head(10)


Saved: C:\Users\User\Desktop\Projects\Car_Price\reports\results\smoke_reco_budget.csv


Unnamed: 0,rec_id,PredPriceUser,Brand,Model,Year,Condition,Price($),Mileage(km),EngineSize(L),Horsepower,Doors,Seats,FuelType,Transmission,DriveType,BodyType,Color,Interior,City,similarity
0,1040,18000,Tesla,Model 3,2021,Used,18000,11,0.0,399,4,5,electric,automatic,AWD,Sedan,black,cloth,Paris,1.0
1,6657,18000,Mercedes-Benz,C-Class,2018,Used,18002,12,2.0,315,4,5,gasoline,automatic,RWD,Sedan,blue,leather,Toronto,1.0
2,25407,18000,Audi,A6,2014,Used,18002,12,2.7,285,4,5,diesel,automatic,AWD,Sedan,red,cloth,Cape Town,1.0
3,27081,18000,Mercedes-Benz,E-Class,2020,Damaged,17997,11,2.2,289,4,5,diesel,manual,RWD,Sedan,silver,leather,Cape Town,1.0
4,31525,18000,Tesla,Model S,2011,Used,17994,12,0.0,835,5,5,electric,automatic,AWD,Sedan,red,cloth,Los Angeles,1.0
5,10378,18000,Honda,Accord,2022,Used,18006,11,1.9,251,4,5,hybrid,automatic,FWD,Sedan,white,cloth,Dubai,1.0
6,37383,18000,Tesla,Model 3,2018,Used,18006,12,0.0,364,4,5,electric,automatic,RWD,Sedan,black,leather,Tokyo,1.0
7,38961,18000,Audi,A6,2015,Used,18007,12,2.0,292,4,5,gasoline,automatic,AWD,Sedan,black,cloth,Los Angeles,1.0
8,11279,18000,BMW,5 Series,2014,Used,18008,12,2.9,366,4,5,hybrid,manual,AWD,Sedan,red,cloth,Tokyo,1.0
9,8938,18000,Mercedes-Benz,S-Class,2009,Used,18011,12,3.0,503,4,4,gasoline,manual,RWD,Sedan,blue,leather,Sao Paulo,0.999


In [5]:
minimal_input = {
    "Mileage(km)": 85000,
    "Year": 2018,
    "Horsepower": 130,
    "EngineSize(L)": 1.6,
}

rec_min = recommend_minimal(
    min_input=minimal_input,    # <-- correct keyword name
    catalog=catalog,
    artifacts=artifacts,
    top_n=10,
    filters={"BodyType": "Sedan"}
)

# Sanity checks
assert not rec_min.empty
assert "Price($)" in rec_min.columns
assert "Doors" in rec_min.columns and pd.api.types.is_integer_dtype(rec_min["Doors"])
assert "similarity" in rec_min.columns

out_path = REPORTS_DIR / "smoke_reco_minimal.csv"
rec_min.to_csv(out_path, index=False)
print("Saved:", out_path)
rec_min.head(10)


Saved: C:\Users\User\Desktop\Projects\Car_Price\reports\results\smoke_reco_minimal.csv


Unnamed: 0,rec_id,Brand,Model,Year,Condition,Price($),Mileage(km),EngineSize(L),Horsepower,Doors,Seats,FuelType,Transmission,DriveType,BodyType,Color,Interior,City,similarity
0,39971,Tesla,Model S,2005,Used,8500,13,0.0,848,5,5,electric,automatic,AWD,Sedan,silver,cloth,Delhi,0.03
1,28718,Tesla,Model S,2005,Used,8172,13,0.0,848,5,5,electric,automatic,AWD,Sedan,gray,cloth,Berlin,0.03
2,33859,Tesla,Model S,2005,Used,8610,13,0.0,848,5,5,electric,automatic,AWD,Sedan,black,leather,Sao Paulo,0.03
3,13762,Tesla,Model S,2005,Used,8521,13,0.0,848,5,5,electric,automatic,AWD,Sedan,red,leather,Tokyo,0.03
4,36582,Tesla,Model S,2005,Used,7461,13,0.0,848,5,5,electric,automatic,AWD,Sedan,black,leather,Delhi,0.03
5,3826,Tesla,Model S,2005,Used,8453,13,0.0,842,5,5,electric,automatic,AWD,Sedan,silver,cloth,Toronto,0.03
6,3217,Tesla,Model S,2005,Used,8384,13,0.0,837,5,5,electric,automatic,AWD,Sedan,silver,leather,Berlin,0.03
7,34366,Tesla,Model S,2005,Used,8462,13,0.0,832,5,5,electric,automatic,AWD,Sedan,red,leather,Sao Paulo,0.03
8,6841,Tesla,Model S,2005,Used,8550,13,0.0,826,5,5,electric,automatic,AWD,Sedan,gray,cloth,Tehran,0.03
9,25615,Tesla,Model S,2005,Used,7800,13,0.0,820,5,5,electric,automatic,AWD,Sedan,blue,cloth,Tehran,0.03
