In [None]:
import pandas as pd
import tkinter as tk
from tkinter import messagebox
from tkinter import ttk
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('real_estate_dataset.csv')

# Data Pre-processing
data = data.dropna()

# Define features and target
target = 'Sale Price'
features = ['City', 'Neighborhood', 'Property Type', 'Bedrooms', 'Bathrooms',
            'Size (sqft)', 'Year Built', 'Days on Market',
            'Proximity to Public Transport (m)', 'Crime Rate',
            'School Quality Score', 'Monthly Rent Estimate',
            'Property Tax (Annual)', 'Economic Index', 'Interest Rate (%)']

X = data[features].copy()
y = data[target]

# Encode categorical variables
label_encoders = {}
for col in ['City', 'Neighborhood', 'Property Type']:
    le = LabelEncoder()
    X.loc[:, col] = le.fit_transform(X[col])
    label_encoders[col] = le

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict & Evaluate
y_pred = model.predict(X_test)
mae = ((y_test - y_pred).abs().mean())
r2 = model.score(X_test, y_test)

# Identify investment opportunities
data['Rent_to_Price_Ratio'] = data['Monthly Rent Estimate'] * 12 / data['Sale Price']
investment_opportunities = data[['Property ID', 'City', 'Neighborhood', 'Sale Price',
                                 'Monthly Rent Estimate', 'Rent_to_Price_Ratio']]
investment_opportunities = investment_opportunities.sort_values('Rent_to_Price_Ratio', ascending=False)
investment_opportunities.head(10).to_csv('investment_opportunities.csv', index=False)


# Tkinter App
class RealEstateApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Real Estate Market Analysis")
        self.model = model
        self.label_encoders = label_encoders
        self.data = data

        # Input fields
        self.entries = {}
        labels = features
        for i, label in enumerate(labels):
            tk.Label(root, text=label).grid(row=i, column=0, padx=5, pady=5)
            if label in ['City', 'Neighborhood', 'Property Type']:
                values = self.data[label].unique()
                var = tk.StringVar()
                tk.OptionMenu(root, var, *values).grid(row=i, column=1, padx=5, pady=5)
                self.entries[label] = var
            else:
                entry = tk.Entry(root)
                entry.grid(row=i, column=1, padx=5, pady=5)
                self.entries[label] = entry

        tk.Button(root, text="Predict Sale Price", command=self.predict).grid(row=len(labels), column=0, columnspan=2, pady=10)

        tk.Label(root, text=f"Model MAE: ${mae:,.2f}").grid(row=len(labels)+1, column=0, columnspan=2)
        tk.Label(root, text=f"Model R² Score: {r2:.4f}").grid(row=len(labels)+2, column=0, columnspan=2)

        tk.Button(root, text="Show Investment Opportunities", command=self.show_investments).grid(row=len(labels)+3, column=0, columnspan=2, pady=10)

        self.result_label = tk.Label(root, text="")
        self.result_label.grid(row=len(labels)+4, column=0, columnspan=2)

    def predict(self):
        try:
            input_data = {}
            for feature, entry in self.entries.items():
                if feature in ['City', 'Neighborhood', 'Property Type']:
                    value = entry.get()
                    if value:
                        input_data[feature] = self.label_encoders[feature].transform([value])[0]
                    else:
                        raise ValueError(f"Please select a {feature}")
                else:
                    value = entry.get()
                    if value:
                        input_data[feature] = float(value)
                    else:
                        raise ValueError(f"Please enter a value for {feature}")

            input_array = [input_data[feature] for feature in features]
            input_array = pd.DataFrame([input_array], columns=features)

            prediction = self.model.predict(input_array)[0]
            self.result_label.config(text=f"Predicted Sale Price: ${prediction:,.2f}")
        except Exception as e:
            messagebox.showerror("Error", str(e))

    def show_investments(self):
        try:
            investments = pd.read_csv('investment_opportunities.csv')
            investments['Formatted Price'] = investments['Sale Price'].apply(lambda x: f"${x:,.2f}")
            investments['Formatted Rent'] = investments['Monthly Rent Estimate'].apply(lambda x: f"${x:,.2f}")
            investments['Formatted Ratio'] = investments['Rent_to_Price_Ratio'].apply(lambda x: f"{x:.4f}")

            investment_window = tk.Toplevel(self.root)
            investment_window.title("Top Investment Opportunities")
            investment_window.geometry("950x500")

            filter_frame = tk.Frame(investment_window)
            filter_frame.pack(fill='x', pady=10)

            tk.Label(filter_frame, text="Filter by City:").pack(side='left', padx=(10, 5))
            cities = sorted(investments['City'].unique())
            city_var = tk.StringVar(value="All")
            city_dropdown = ttk.Combobox(filter_frame, textvariable=city_var, values=["All"] + cities, state='readonly')
            city_dropdown.pack(side='left', padx=5)

            tk.Label(filter_frame, text="Min Rent-to-Price Ratio:").pack(side='left', padx=(20, 5))
            ratio_var = tk.DoubleVar(value=0.05)
            tk.Scale(filter_frame, from_=0.01, to=0.20, resolution=0.01, orient='horizontal', variable=ratio_var).pack(side='left')

            def refresh_table():
                for i in tree.get_children():
                    tree.delete(i)

                filtered = investments.copy()
                if city_var.get() != "All":
                    filtered = filtered[filtered['City'] == city_var.get()]
                filtered = filtered[filtered['Rent_to_Price_Ratio'] >= ratio_var.get()]
                filtered = filtered.sort_values('Rent_to_Price_Ratio', ascending=False).head(10)

                for _, row in filtered.iterrows():
                    tag = 'high' if row['Rent_to_Price_Ratio'] > 0.08 else 'low'
                    tree.insert('', 'end', values=(
                        row['Property ID'], row['City'], row['Neighborhood'],
                        row['Formatted Price'], row['Formatted Rent'], row['Formatted Ratio']
                    ), tags=(tag,))

            tk.Button(filter_frame, text="Apply Filters", command=refresh_table).pack(side='left', padx=10)

            columns = ['Property ID', 'City', 'Neighborhood', 'Formatted Price', 'Formatted Rent', 'Formatted Ratio']
            tree = ttk.Treeview(investment_window, columns=columns, show='headings')

            for col in columns:
                tree.heading(col, text=col, command=lambda c=col: sort_column(tree, c, False))
                tree.column(col, width=140, anchor='center')

            tree.tag_configure('high', background='#e0ffe0')  # Light green
            tree.tag_configure('low', background='#ffe0e0')   # Light red

            scrollbar = ttk.Scrollbar(investment_window, orient='vertical', command=tree.yview)
            tree.configure(yscroll=scrollbar.set)
            scrollbar.pack(side='right', fill='y')
            tree.pack(expand=True, fill='both', padx=10, pady=10)

            def sort_column(treeview, col, reverse):
                col_data = [(treeview.set(k, col), k) for k in treeview.get_children('')]
                try:
                    col_data.sort(key=lambda t: float(t[0].replace("$", "").replace(",", "")), reverse=reverse)
                except:
                    col_data.sort(reverse=reverse)
                for index, (_, k) in enumerate(col_data):
                    treeview.move(k, '', index)
                treeview.heading(col, command=lambda: sort_column(treeview, col, not reverse))

            refresh_table()

        except Exception as e:
            messagebox.showerror("Error", str(e))




# Run app
if __name__ == "__main__":
    root = tk.Tk()
    app = RealEstateApp(root)
    root.mainloop()
