In [1]:
import pandas as pd
import numpy as np
import joblib
import re

# -----------------------------
# Load Model & Dataset
# -----------------------------

MODEL_PATH = "../04_modeling/models/price_prediction_model.pkl"
model = joblib.load(MODEL_PATH)

cars = pd.read_csv("../02_data_cleaning/clean_data/cars_merged.csv")
print("Loaded model and dataset.")
cars.head()

Loaded model and dataset.


Unnamed: 0,ref_no,title,make,model,year,mileage,engine_cc,transmission,fuel,seats,location,price_usd,total_price_usd,price_ugx,total_price_ugx,url,source
0,BY759022,2006 HONDA ACTY TRUCK,Honda,Acty,2006,111250.0,650.0,MT,Petrol,2,Location,2370,39730.0,8532000,143028000.0,/honda/acty-truck/by759022/id/11910020/,BeForward
1,BY759024,2013 MITSUBISHI CANTER,Mitsubishi,Canter,2013,312401.0,2990.0,Semi AT,Diesel,3,Location,3920,77470.0,14112000,278892000.0,/mitsubishi/canter/by759024/id/11910065/,BeForward
2,CA462307,2014 TOYOTA HIACE VAN DX,Toyota,Hiace,2014,161352.0,2980.0,AT,Diesel,3,Location,5960,84920.0,21456000,305712000.0,/toyota/hiace-van/ca462307/id/12565207/,BeForward
3,CA740447,2011 TOYOTA WISH\n ...,Toyota,Wish,2011,154001.0,1790.0,AT,Petrol,7,Location,2430,45440.0,8748000,163584000.0,/toyota/wish/ca740447/id/12831915/,BeForward
4,CB026605,2017 HONDA FIT HYBRID\n ...,Honda,Fit,2017,111072.0,1490.0,AT,Hybrid(Petrol),5,Location,4790,66330.0,17244000,238788000.0,/honda/fit-hybrid/cb026605/id/13106276/,BeForward


In [3]:
def force_numeric(val):
    if pd.isna(val):
        return np.nan
    s = str(val).strip().lower()
    if s in ["ask", "-", "--", "", "nan", "none"]:
        return np.nan
    digits = re.sub(r"\D", "", s)
    return float(digits) if digits else np.nan


In [4]:
def get_user_input():
    print("Enter car details:\n")

    data = {}

    data["make"] = input("Make (e.g., Toyota): ").strip().title()
    data["model"] = input("Model (e.g., Premio): ").strip().title()
    data["year"] = force_numeric(input("Year (e.g., 2012): "))
    data["mileage"] = force_numeric(input("Mileage (km): "))
    data["engine_cc"] = force_numeric(input("Engine CC (e.g., 1800): "))
    data["transmission"] = input("Transmission (Automatic/Manual/CVT): ").strip().title()
    data["fuel"] = input("Fuel Type (Petrol/Diesel/Hybrid): ").strip().title()
    data["seats"] = force_numeric(input("Seats (e.g., 4): "))
    data["source"] = "User"

    return pd.DataFrame([data])

In [5]:
def recommend_price(input_df):
    predicted = model.predict(input_df)[0]
    return predicted

In [6]:
def find_similar_cars(df_input, k=5):
    df = cars.copy()

    # Prefer exact make/model match
    df = df[
        (df["make"].str.lower() == df_input["make"][0].lower()) &
        (df["model"].str.lower() == df_input["model"][0].lower())
    ]

    if df.empty:
        # fallback â€” same make only
        df = cars[cars["make"].str.lower() == df_input["make"][0].lower()]

    # Compute similarity score
    df["score"] = (
        (df["year"] - df_input["year"][0]).abs() * 0.4 +
        (df["engine_cc"] - df_input["engine_cc"][0]).abs() * 0.3 +
        (df["mileage"] - df_input["mileage"][0]).abs() * 0.3
    )

    df = df.sort_values("score").head(k)

    return df


In [7]:
def price_range(optimal):
    low = optimal * 0.9
    high = optimal * 1.1
    return low, optimal, high


In [9]:
print("### CAR PRICE RECOMMENDATION SYSTEM ###\n")

user_df = get_user_input()

# Predict
recommended_price = recommend_price(user_df)

low, fair, high = price_range(recommended_price)

print("\n### RECOMMENDED PRICE (UGX) ###")
print(f"Optimal Price: {fair:,.0f} UGX")
print(f"Suggested Price Range: {low:,.0f} UGX  -  {high:,.0f} UGX")

# Show similar cars
print("\n### SIMILAR CARS ###")
similar = find_similar_cars(user_df, k=5)
display(similar[[
    "make","model","year","mileage","engine_cc","price_ugx","source","url"
]])


### CAR PRICE RECOMMENDATION SYSTEM ###

Enter car details:


### RECOMMENDED PRICE (UGX) ###
Optimal Price: 10,015,830 UGX
Suggested Price Range: 9,014,247 UGX  -  11,017,413 UGX

### SIMILAR CARS ###


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["score"] = (


Unnamed: 0,make,model,year,mileage,engine_cc,price_ugx,source,url
303,Toyota,Rav4,2007,156600.0,2200.0,16452000,BeForward,/toyota/rav4/cb205702/id/13263119/
2110,Toyota,Aqua,2012,155417.0,1500.0,10404000,BeForward,/toyota/aqua/cb203122/id/13260511/
307,Toyota,Rav4,2009,159000.0,2200.0,24300000,BeForward,/toyota/rav4/cb205706/id/13263123/
86,Toyota,Hilux,2002,155200.0,2494.0,38088000,BeForward,/toyota/hilux/cb205879/id/13263298/
1251,Toyota,Hiace,2009,160253.0,2000.0,39852000,BeForward,/toyota/hiace-van/ca631632/id/12718008/
