In [1]:
import os
import joblib
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
CSV_PATH = "carprices4.csv"
DUMMY_PATH = "model_dummy_pipeline.joblib"
LABEL_OHE_PATH = "model_label_ohe_pipeline.joblib"

In [3]:
# Train the Dummy-variables pipeline drop generic car_model column
def train_dummy_pipeline(csv_path=CSV_PATH, dump_path=DUMMY_PATH, save=True):
    df = pd.read_csv(csv_path)
    X = df[["Car Model", "Mileage", "Age(yrs)"]]
    y = df["Sell Price($)"]

    pre = ColumnTransformer(
        transformers=[("cat", OneHotEncoder(handle_unknown="ignore", drop="first", sparse_output=False), ["Car Model"])],
        remainder="passthrough"
    )
    pipe = Pipeline([("pre", pre), ("est", LinearRegression())]).fit(X, y)

    if save and dump_path:
        joblib.dump(pipe, dump_path)
    return pipe


In [None]:
# Train the Label One-Hot pipeline (label codes, then dummy columns, drop first)
def train_label_then_ohe_pipeline(csv_path=CSV_PATH, dump_path=LABEL_OHE_PATH, save=True):
    df = pd.read_csv(csv_path)
    X = df[["Car Model", "Mileage", "Age(yrs)"]]
    y = df["Sell Price($)"]

    cat_pipe = Pipeline([
        ("label", OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)),
        ("ohe", OneHotEncoder(handle_unknown="ignore", drop="first", sparse_output=False)),
    ])
    pre = ColumnTransformer([("cat", cat_pipe, ["Car Model"])], remainder="passthrough")
    pipe = Pipeline([("pre", pre), ("est", LinearRegression())]).fit(X, y)

    if save and dump_path:
        joblib.dump(pipe, dump_path)
    return pipe
    

In [5]:
# Try to load a saved pipeline
def _load_or_retrain(path, trainer, df_for_probe):
    if not os.path.exists(path):
        return trainer()
    try:
        pipe = joblib.load(path)
        probe = df_for_probe.iloc[[0]][["Car Model", "Mileage", "Age(yrs)"]]
        _ = pipe.predict(probe)
        return pipe
    except Exception as e:
        print(f"[warn] Rebuilding {path} due to: {e}")
        return trainer()

In [6]:
# Make sure both pipelines exist and are usable
def ensure_pipelines():
    df_probe = pd.read_csv(CSV_PATH)
    pipe_dummy = _load_or_retrain(DUMMY_PATH, train_dummy_pipeline, df_probe)
    pipe_label = _load_or_retrain(LABEL_OHE_PATH, train_label_then_ohe_pipeline, df_probe)
    return pipe_dummy, pipe_label

In [7]:
#Build both pipelines
PIPE_DUMMY, PIPE_LABEL = ensure_pipelines()

# Read CSV once to make drop down list
DF_ALL = pd.read_csv(CSV_PATH)
MODEL_CHOICES = sorted(DF_ALL["Car Model"].dropna().unique().tolist())

TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'

In [None]:
def calculate_price():
    name = (car_model.get() or "").strip()
    if not name:
        result_label.config(text="Select a car model."); return
    try:
        age = int(age_entry.get())
        mileage = int(mileage_entry.get())
    except ValueError:
        result_label.config(text="Enter valid integers for age and mileage."); return

    row = pd.DataFrame([{"Car Model": name, "Mileage": mileage, "Age(yrs)": age}])
    pipe = PIPE_DUMMY if encoding_choice.get() == "dummy" else PIPE_LABEL
    try:
        pred = float(pipe.predict(row)[0])
        result_label.config(text=f"Estimated Price: ${pred:,.2f}")
    except Exception as e:
        messagebox.showerror("Prediction error", str(e))

In [None]:
def reset_fields():
    car_model.set("")
    age_entry.delete(0, tk.END)
    mileage_entry.delete(0, tk.END)
    result_label.config(text="Estimated Price: ")

In [None]:
# Tkinter standard GUI
root = tk.Tk()
root.title("Car Price Estimator")
root.geometry("420x320")

tk.Label(root, text="Encoding Strategy:").pack(pady=(8, 0))
encoding_choice = tk.StringVar(value="dummy")
rb = tk.Frame(root); rb.pack()
tk.Radiobutton(rb, text="Dummy variables", variable=encoding_choice, value="dummy").grid(row=0, column=0, padx=6)
tk.Radiobutton(rb, text="Label → One-Hot", variable=encoding_choice, value="label_then_ohe").grid(row=0, column=1, padx=6)

tk.Label(root, text="Car Model:").pack(pady=(10, 0))
car_model = ttk.Combobox(root, values=MODEL_CHOICES, state="readonly"); car_model.pack()

tk.Label(root, text="Age (years):").pack(pady=(10, 0))
age_entry = tk.Entry(root); age_entry.pack()

tk.Label(root, text="Mileage (km):").pack(pady=(10, 0))
mileage_entry = tk.Entry(root); mileage_entry.pack()

result_label = tk.Label(root, text="Estimated Price: ", font=("Arial", 12))
result_label.pack(pady=12)

btn = tk.Frame(root); btn.pack(pady=10)
tk.Button(btn, text="Calculate Price", command=calculate_price).grid(row=0, column=0, padx=6)
tk.Button(btn, text="Reset", command=reset_fields).grid(row=0, column=1, padx=6)

root.mainloop()