In [1]:
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# ---------------------------
# Data Loading and Model Training
# ---------------------------
# Load your dataset (ensure "Housing.csv" is in your working directory)
df = pd.read_csv("Housing.csv")

# One-hot encode categorical variables for modeling
df_encoded = pd.get_dummies(df, drop_first=True)
X = df_encoded.drop(columns=["price"])
y = df_encoded["price"]
feature_columns = X.columns.tolist()

# Split the data and train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

# Determine original features and types for the input form
original_features = df.drop(columns=["price"]).columns
categorical_features = df[original_features].select_dtypes(include=["object"]).columns
numerical_features = df[original_features].select_dtypes(exclude=["object"]).columns

# Valid categories for categorical features
valid_categories = {feature: df[feature].unique().tolist() for feature in categorical_features}

# ---------------------------
# Tkinter GUI Setup with Scrollable Frame
# ---------------------------
root = tk.Tk()
root.title("Housing Price Prediction")
root.geometry("450x600")  # Adjust window size as needed

# Create a canvas and a vertical scrollbar for scrolling the content
canvas = tk.Canvas(root)
scrollbar = ttk.Scrollbar(root, orient="vertical", command=canvas.yview)
canvas.configure(yscrollcommand=scrollbar.set)

# Create a frame that will contain all the widgets
scrollable_frame = ttk.Frame(canvas)

# Make sure the canvas scrolls when the frame size changes
scrollable_frame.bind(
    "<Configure>",
    lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
)

# Place the scrollable frame in the canvas
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")

# Pack the canvas and scrollbar into the main window
canvas.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")

# ---------------------------
# Bind Mouse Wheel for Scrolling
# ---------------------------
def on_mousewheel(event):
    # For Windows and macOS (event.delta may vary on macOS)
    canvas.yview_scroll(-1 * int(event.delta / 120), "units")

# Windows and macOS
canvas.bind_all("<MouseWheel>", on_mousewheel)
# Linux (scroll up)
canvas.bind_all("<Button-4>", lambda event: canvas.yview_scroll(-1, "units"))
# Linux (scroll down)
canvas.bind_all("<Button-5>", lambda event: canvas.yview_scroll(1, "units"))

# ---------------------------
# Create Input Fields with Example Hints
# ---------------------------
# Dictionary to store Tkinter StringVar variables for each feature.
# We use StringVar for both numerical and categorical fields so they start empty.
entries = {}
row = 0

for feature in original_features:
    # Format the feature label: Capitalize words and replace underscores with spaces.
    label_text = feature.replace("_", " ").title()
    tk.Label(scrollable_frame, text=label_text, font=("Helvetica", 10, "bold")).grid(
        row=row, column=0, padx=10, pady=5, sticky=tk.W
    )
    
    # Create the input widget
    if feature in categorical_features:
        var = tk.StringVar()
        combobox = ttk.Combobox(scrollable_frame, textvariable=var, state="readonly")
        combobox['values'] = valid_categories[feature]
        combobox.set('')  # Start with an empty selection.
        combobox.grid(row=row, column=1, padx=10, pady=5, sticky=tk.W)
        entries[feature] = var
        # Use the first allowed option as the example
        example_text = f"Example: {valid_categories[feature][0]}"
    else:
        var = tk.StringVar()
        entry = tk.Entry(scrollable_frame, textvariable=var)
        entry.grid(row=row, column=1, padx=10, pady=5, sticky=tk.W)
        entries[feature] = var
        # For numerical features, use the first non-null value from the dataset as an example.
        default_val = df[feature].dropna().iloc[0]
        example_text = f"Example: {default_val}"
    
    row += 1
    # Place a small, gray example hint below the input field
    tk.Label(scrollable_frame, text=example_text, font=("Helvetica", 8), fg="gray").grid(
        row=row, column=1, padx=10, pady=(0, 5), sticky=tk.W
    )
    row += 1

# ---------------------------
# Prediction Function
# ---------------------------
def predict_price():
    # Collect user input from the fields.
    user_input = {}
    for feature in original_features:
        value = entries[feature].get().strip()
        if feature in categorical_features:
            if not value:
                messagebox.showerror("Invalid input", f"Please select a value for {feature.replace('_', ' ').title()}.")
                return
            user_input[feature] = [value]
        else:
            if not value:
                messagebox.showerror("Invalid input", f"Please enter a number for {feature.replace('_', ' ').title()}.")
                return
            try:
                num_value = float(value)
                user_input[feature] = [num_value]
            except ValueError:
                messagebox.showerror("Invalid input", f"Invalid number for {feature.replace('_', ' ').title()}.")
                return

    # Create a DataFrame for the user input and one-hot encode it.
    input_df = pd.DataFrame(user_input)
    input_encoded = pd.get_dummies(input_df, drop_first=True)
    
    # Align with the training feature columns (add any missing columns with zeros)
    input_aligned = pd.DataFrame(0, columns=feature_columns, index=[0])
    for col in input_encoded.columns:
        if col in feature_columns:
            input_aligned[col] = input_encoded[col].values[0]
    
    # Predict the housing price using the trained model.
    predicted_price = model.predict(input_aligned)[0]
    
    # ---------------------------
    # Optional Deviation Calculation
    # ---------------------------
    # If the dataset contains a 'price' column, find the closest matching row.
    if "price" in df.columns:
        # Convert categorical values to numerical codes for both input and dataset.
        input_df_numeric = input_df.copy()
        for col in categorical_features:
            input_df_numeric[col] = input_df_numeric[col].astype('category').cat.codes

        df_numeric = df.copy()
        for col in categorical_features:
            df_numeric[col] = df_numeric[col].astype('category').cat.codes

        # Compute Euclidean distances (excluding the 'price' column).
        distances = np.sqrt(((df_numeric.drop(columns=['price']) - input_df_numeric.values) ** 2).sum(axis=1))
        closest_index = distances.idxmin()
        actual_price = df.loc[closest_index, 'price']
        deviation = abs(predicted_price - actual_price)
        
        result_text = (
            f"Predicted Housing Price: ${predicted_price:,.2f}\n\n"
            f"Actual Price (closest match): ${actual_price:,.2f}\n"
            f"Deviation: ${deviation:,.2f}"
        )
    else:
        result_text = f"Predicted Housing Price: ${predicted_price:,.2f}"
    
    # Display the result in the result_label.
    result_label.config(text=result_text)

# ---------------------------
# Add Predict Button and Result Display
# ---------------------------
predict_button = tk.Button(scrollable_frame, text="Predict Price", font=("Helvetica", 10, "bold"), command=predict_price)
predict_button.grid(row=row, column=0, columnspan=2, padx=10, pady=15)
row += 1

result_label = tk.Label(scrollable_frame, text="", font=("Helvetica", 11), justify=tk.LEFT)
result_label.grid(row=row, column=0, columnspan=2, padx=10, pady=10)

root.mainloop()