In [10]:
import pandas as pd
import tkinter as tk
from tkinter import messagebox
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [11]:
# Load dataset
df = pd.read_csv("real_estate_dataset.csv")

In [12]:
df

Unnamed: 0,Property ID,City,Neighborhood,Property Type,Bedrooms,Bathrooms,Size (sqft),Year Built,Sale Price,Listing Price,Date of Sale,Days on Market,Proximity to Public Transport (m),Crime Rate,School Quality Score,Monthly Rent Estimate,Property Tax (Annual),Economic Index,Interest Rate (%)
0,1,New York,Queens,Condo,4,2,1628,1998,737262,785736,2020-07-15,16,367,6.36,4,5247,10691,0.55,6.65
1,2,Phoenix,Downtown,Condo,2,1,4953,1982,357036,388059,2023-09-11,118,592,1.38,9,2551,4813,1.01,6.08
2,3,Los Angeles,Venice,Townhouse,2,4,1671,1972,769150,851651,2024-09-10,108,820,6.60,1,6184,11318,0.91,3.69
3,4,Los Angeles,Beverly Hills,Condo,3,1,3498,1997,561241,529452,2024-02-08,59,1555,3.71,10,3459,3500,1.19,3.33
4,5,Los Angeles,Venice,Apartment,3,3,4849,2007,432830,462741,2024-02-28,15,1509,2.54,8,2597,3758,0.70,6.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,Los Angeles,Beverly Hills,House,2,4,4448,2009,430906,413717,2023-09-30,20,304,9.54,2,2361,3180,1.04,5.60
96,97,Chicago,Hyde Park,House,5,4,1458,1990,954033,905131,2024-08-06,82,1944,3.92,10,7478,10931,0.50,6.78
97,98,Los Angeles,Venice,Townhouse,1,3,2766,2002,919848,870160,2023-05-24,112,545,1.65,10,7941,12035,1.23,4.78
98,99,Houston,The Heights,Townhouse,5,3,3509,1990,889542,816806,2022-01-10,80,928,9.92,2,6609,5217,1.42,3.13


In [13]:
# Encode categorical columns
label_encoders = {}

In [14]:
for column in ['City', 'Neighborhood', 'Property Type']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

In [15]:
# Add ROI and Investment Opportunity columns
df['ROI'] = df['Sale Price'] - df['Listing Price']
df['Good Investment'] = df['ROI'].apply(lambda x: 'Yes' if x > 0 else 'No')

In [16]:

# Define input features and target
features = [
    "Bedrooms", "Bathrooms", "Size (sqft)", "Year Built",
    "Listing Price", "Days on Market", "Proximity to Public Transport (m)",
    "Crime Rate", "School Quality Score", "Monthly Rent Estimate",
    "Property Tax (Annual)", "Economic Index", "Interest Rate (%)",
    "City", "Neighborhood", "Property Type"
]

In [17]:
X = df[features]
y_price = df["Sale Price"]
y_invest = df["Good Investment"].map({'Yes': 1, 'No': 0})

In [18]:
# Split the data
X_train_price, X_test_price, y_train_price, y_test_price = train_test_split(X, y_price, test_size=0.2, random_state=42)
X_train_invest, X_test_invest, y_train_invest, y_test_invest = train_test_split(X, y_invest, test_size=0.2, random_state=42)

In [19]:

# Train models
rf_price = RandomForestRegressor()
rf_price.fit(X_train_price, y_train_price)

rf_invest = RandomForestRegressor()
rf_invest.fit(X_train_invest, y_train_invest)

In [22]:

# Create the interface
def predict():
    try:
        input_data = np.array([
            int(entry_bedrooms.get()),
            float(entry_bathrooms.get()),
            float(entry_sqft.get()),
            int(entry_year.get()),
            float(entry_listing.get()),
            int(entry_days.get()),
            float(entry_transport.get()),
            float(entry_crime.get()),
            float(entry_school.get()),
            float(entry_rent.get()),
            float(entry_tax.get()),
            float(entry_economic.get()),
            float(entry_interest.get()),
            label_encoders['City'].transform([entry_city.get()])[0],
            label_encoders['Neighborhood'].transform([entry_neighborhood.get()])[0],
            label_encoders['Property Type'].transform([entry_type.get()])[0]
        ]).reshape(1, -1)

        predicted_price = rf_price.predict(input_data)[0]
        predicted_investment = rf_invest.predict(input_data)[0]
        investment_decision = "Yes" if predicted_investment > 0.5 else "No"

        messagebox.showinfo("Prediction Result", f"Predicted Sale Price: ${predicted_price:,.2f}\nGood Investment: {investment_decision}")

    except Exception as e:
        messagebox.showerror("Input Error", str(e))

In [24]:
# GUI setup
app = tk.Tk()
app.title("Real Estate Price & Investment Predictor")

labels_entries = [
    ("Bedrooms", "entry_bedrooms"),
    ("Bathrooms", "entry_bathrooms"),
    ("Size (sqft)", "entry_sqft"),
    ("Year Built", "entry_year"),
    ("Listing Price", "entry_listing"),
    ("Days on Market", "entry_days"),
    ("Proximity to Public Transport (m)", "entry_transport"),
    ("Crime Rate", "entry_crime"),
    ("School Quality Score", "entry_school"),
    ("Monthly Rent Estimate", "entry_rent"),
    ("Property Tax (Annual)", "entry_tax"),
    ("Economic Index", "entry_economic"),
    ("Interest Rate (%)", "entry_interest"),
    ("City", "entry_city"),
    ("Neighborhood", "entry_neighborhood"),
    ("Property Type", "entry_type")
]

entries = {}
for i, (label_text, var_name) in enumerate(labels_entries):
    tk.Label(app, text=label_text).grid(row=i, column=0)
    entry = tk.Entry(app)
    entry.grid(row=i, column=1)
    entries[var_name] = entry

entry_bedrooms = entries['entry_bedrooms']
entry_bathrooms = entries['entry_bathrooms']
entry_sqft = entries['entry_sqft']
entry_year = entries['entry_year']
entry_listing = entries['entry_listing']
entry_days = entries['entry_days']
entry_transport = entries['entry_transport']
entry_crime = entries['entry_crime']
entry_school = entries['entry_school']
entry_rent = entries['entry_rent']
entry_tax = entries['entry_tax']
entry_economic = entries['entry_economic']
entry_interest = entries['entry_interest']
entry_city = entries['entry_city']
entry_neighborhood = entries['entry_neighborhood']
entry_type = entries['entry_type']

tk.Button(app, text="Predict", command=predict).grid(row=len(labels_entries), column=0, columnspan=2)

app.mainloop()