In [17]:
import pandas as pd
import tkinter as tk
from tkinter import messagebox
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [18]:
# Load the synthetic real estate dataset
df = pd.read_csv("synthetic_real_estate_data.csv")

# Use only 4 features for simplicity
selected_features = ["Square_Feet", "Bedrooms", "Bathrooms", "Year_Built"]
X = df[selected_features]
y = df["Price"]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Regressor
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

In [22]:
# Set up the GUI
root = tk.Tk()
root.title("Real Estate Price Predictor")

# Input Labels and Entry Fields
tk.Label(root, text="Square Feet:").grid(row=0, column=0, padx=10, pady=5, sticky="e")
entry_sqft = tk.Entry(root)
entry_sqft.grid(row=0, column=1, pady=5)

tk.Label(root, text="Bedrooms:").grid(row=1, column=0, padx=10, pady=5, sticky="e")
entry_bed = tk.Entry(root)
entry_bed.grid(row=1, column=1, pady=5)

tk.Label(root, text="Bathrooms:").grid(row=2, column=0, padx=10, pady=5, sticky="e")
entry_bath = tk.Entry(root)
entry_bath.grid(row=2, column=1, pady=5)

tk.Label(root, text="Year Built:").grid(row=3, column=0, padx=10, pady=5, sticky="e")
entry_year = tk.Entry(root)
entry_year.grid(row=3, column=1, pady=5)

# Prediction function
def predict_price():
    try:
        sqft = float(entry_sqft.get())
        bed = int(entry_bed.get())
        bath = int(entry_bath.get())
        year = int(entry_year.get())

        # Create DataFrame with correct column names
        input_data = pd.DataFrame([[sqft, bed, bath, year]], columns=selected_features)
        prediction = rf.predict(input_data)

        result_label.config(text=f"Predicted Price: ${prediction[0]:,.2f}")
    except ValueError:
        messagebox.showerror("Input Error", "Please enter valid numeric values.")

# Predict Button
tk.Button(root, text="Predict Price", command=predict_price).grid(row=4, column=0, columnspan=2, pady=10)

# Result Display Label
result_label = tk.Label(root, text="", font=("Helvetica", 14))
result_label.grid(row=5, column=0, columnspan=2, pady=10)

# Run the GUI loop
root.mainloop()

In [23]:
df

Unnamed: 0,Location,Square_Feet,Bedrooms,Bathrooms,Year_Built,Garage,Has_Garden,Nearby_Schools,Crime_Rate,Proximity_to_City_Center_km,Interest_Rate,Employment_Rate,Price
0,Beachfront,1095,5,1,1970,1,1,0,3.88,26.7,5.21,88.7,326277
1,City Center,1228,1,3,1971,1,1,3,3.31,10.1,4.21,71.3,291755
2,Countryside,1865,5,3,1997,1,0,4,0.84,27.1,5.43,97.4,437638
3,City Center,4992,5,2,1986,0,1,0,4.39,29.9,5.80,94.4,667006
4,City Center,1198,3,2,1964,1,0,0,2.74,24.8,3.09,78.6,224883
...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,Downtown,1451,3,3,1956,0,0,2,2.83,19.2,2.69,89.8,512342
496,Countryside,1222,4,1,1984,1,1,4,0.29,19.1,2.89,70.7,391080
497,Downtown,3889,1,3,2004,0,0,3,3.37,12.4,2.68,90.0,1163804
498,Downtown,2579,1,2,1963,0,0,1,1.68,22.7,3.24,79.2,370069
