In [1]:
import pandas as pd
import numpy as np
import joblib
import re

In [2]:
MODEL_PATH = "models/overpricing_model.pkl"
clf = joblib.load(MODEL_PATH)

cars = pd.read_csv("../02_data_cleaning/clean_data/cars_merged.csv")
print("Loaded overpricing model and dataset.")

Loaded overpricing model and dataset.


In [3]:
def force_numeric(val):
    if pd.isna(val):
        return np.nan
    s = str(val).strip().lower()
    if s in ["ask", "-", "--", "", "nan", "none"]:
        return np.nan
    digits = re.sub(r"\D", "", s)
    return float(digits) if digits else np.nan


In [4]:
def get_user_input():
    print("Enter car details:\n")

    data = {}

    data["make"] = input("Make (e.g., Toyota): ").strip().title()
    data["model"] = input("Model (e.g., Premio): ").strip().title()
    data["year"] = force_numeric(input("Year (e.g., 2012): "))
    data["mileage"] = force_numeric(input("Mileage (km): "))
    data["engine_cc"] = force_numeric(input("Engine CC (e.g., 1800): "))
    data["transmission"] = input("Transmission (Automatic/Manual/CVT): ").strip().title()
    data["fuel"] = input("Fuel Type (Petrol/Diesel/Hybrid): ").strip().title()
    data["seats"] = force_numeric(input("Seats (e.g., 4): "))
    data["source"] = "User"

    data["price_ugx"] = force_numeric(input("Sellerâ€™s Asking Price (UGX): "))

    return pd.DataFrame([data])


In [5]:
def classify_overpricing(input_df):
    prediction = clf.predict(input_df)[0]
    return prediction

In [6]:
def find_similar_cars(df_input, k=5):
    df = cars.copy()

    df = df[
        (df["make"].str.lower() == df_input["make"][0].lower()) &
        (df["model"].str.lower() == df_input["model"][0].lower())
    ]

    if df.empty:
        df = cars[cars["make"].str.lower() == df_input["make"][0].lower()]

    df["score"] = (
        (df["year"] - df_input["year"][0]).abs() * 0.4 +
        (df["engine_cc"] - df_input["engine_cc"][0]).abs() * 0.3 +
        (df["mileage"] - df_input["mileage"][0]).abs() * 0.3
    )

    return df.sort_values("score").head(k)


In [8]:
print("### CAR PRICE REASONABLENESS CHECKER ###\n")

input_df = get_user_input()

# Predict classification (underpriced / fair / overpriced)
status = classify_overpricing(input_df)

print("\n### MODEL PREDICTION ###")
print(f"Price Status: **{status.upper()}**")

# Show similar cars
print("\n### SIMILAR CARS ###")
similar = find_similar_cars(input_df, k=5)

display(similar[[
    "make","model","year","mileage","engine_cc",
    "price_ugx","source","url"
]])


### CAR PRICE REASONABLENESS CHECKER ###

Enter car details:


### MODEL PREDICTION ###
Price Status: **FAIR**

### SIMILAR CARS ###


Unnamed: 0,make,model,year,mileage,engine_cc,price_ugx,source,url
319,Toyota,Rav4,2015,63250.0,2200.0,65844000,BeForward,/toyota/rav4/cb205718/id/13263135/
304,Toyota,Rav4,2007,65018.0,2000.0,31932000,BeForward,/toyota/rav4/cb205703/id/13263120/
317,Toyota,Rav4,2014,69000.0,2000.0,65376000,BeForward,/toyota/rav4/cb205716/id/13263133/
301,Toyota,Rav4,2005,70000.0,2000.0,57276000,BeForward,/toyota/rav4/cb205700/id/13263117/
2376,Toyota,Rav4,2021,50969.0,1980.0,57996000,BeForward,/toyota/rav4/cb202575/id/13259963/
