In [1]:
import pandas as pd
import tkinter as tk
from tkinter import messagebox
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('real_estate_dataset.csv')

# Data Pre-processing
# Handle missing values (drop if any)
data = data.dropna()

# Define features and target
target = 'Sale Price'
features = ['City', 'Neighborhood', 'Property Type', 'Bedrooms', 'Bathrooms', 
            'Size (sqft)', 'Year Built', 'Days on Market', 
            'Proximity to Public Transport (m)', 'Crime Rate', 
            'School Quality Score', 'Monthly Rent Estimate', 
            'Property Tax (Annual)', 'Economic Index', 'Interest Rate (%)']

X = data[features].copy()  # Create an explicit copy to avoid slice issues
y = data[target]

# Encode categorical variables using .loc to avoid SettingWithCopyWarning
label_encoders = {}
for col in ['City', 'Neighborhood', 'Property Type']:
    le = LabelEncoder()
    X.loc[:, col] = le.fit_transform(X[col])
    label_encoders[col] = le

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model
mae = ((y_test - y_pred).abs().mean())
r2 = model.score(X_test, y_test)

# Identify investment opportunities (high rent-to-price ratio)
data['Rent_to_Price_Ratio'] = data['Monthly Rent Estimate'] * 12 / data['Sale Price']
investment_opportunities = data[['Property ID', 'City', 'Neighborhood', 'Sale Price', 
                                'Monthly Rent Estimate', 'Rent_to_Price_Ratio']]
investment_opportunities = investment_opportunities.sort_values('Rent_to_Price_Ratio', ascending=False)
investment_opportunities.head(10).to_csv('investment_opportunities.csv', index=False)

# Tkinter GUI
class RealEstateApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Real Estate Market Analysis")
        self.model = model
        self.label_encoders = label_encoders
        self.data = data
        
        # Create input fields
        self.entries = {}
        labels = features
        for i, label in enumerate(labels):
            tk.Label(root, text=label).grid(row=i, column=0, padx=5, pady=5)
            if label in ['City', 'Neighborhood', 'Property Type']:
                # Create dropdowns for categorical variables
                values = self.data[label].unique()
                var = tk.StringVar()
                tk.OptionMenu(root, var, *values).grid(row=i, column=1, padx=5, pady=5)
                self.entries[label] = var
            else:
                entry = tk.Entry(root)
                entry.grid(row=i, column=1, padx=5, pady=5)
                self.entries[label] = entry
        
        # Predict button
        tk.Button(root, text="Predict Sale Price", command=self.predict).grid(row=len(labels), column=0, columnspan=2, pady=10)
        
        # Display model performance
        tk.Label(root, text=f"Model MAE: ${mae:,.2f}").grid(row=len(labels)+1, column=0, columnspan=2)
        tk.Label(root, text=f"Model R² Score: {r2:.4f}").grid(row=len(labels)+2, column=0, columnspan=2)
        
        # Display investment opportunities
        tk.Button(root, text="Show Investment Opportunities", command=self.show_investments).grid(row=len(labels)+3, column=0, columnspan=2, pady=10)
        
        # Result label
        self.result_label = tk.Label(root, text="")
        self.result_label.grid(row=len(labels)+4, column=0, columnspan=2)
    
    def predict(self):
        try:
            # Collect input data
            input_data = {}
            for feature, entry in self.entries.items():
                if feature in ['City', 'Neighborhood', 'Property Type']:
                    value = entry.get()
                    if value:
                        input_data[feature] = self.label_encoders[feature].transform([value])[0]
                    else:
                        raise ValueError(f"Please select a {feature}")
                else:
                    value = entry.get()
                    if value:
                        input_data[feature] = float(value)
                    else:
                        raise ValueError(f"Please enter a value for {feature}")
            
            # Create input array
            input_array = [input_data[feature] for feature in features]
            input_array = pd.DataFrame([input_array], columns=features)
            
            # Predict
            prediction = self.model.predict(input_array)[0]
            self.result_label.config(text=f"Predicted Sale Price: ${prediction:,.2f}")
        except Exception as e:
            messagebox.showerror("Error", str(e))
    
    def show_investments(self):
        try:
            # Read investment opportunities
            investments = pd.read_csv('investment_opportunities.csv')
            investment_text = "Top 5 Investment Opportunities:\n\n"
            for _, row in investments.head(5).iterrows():
                investment_text += (f"Property ID: {row['Property ID']}, "
                                  f"City: {row['City']}, "
                                  f"Neighborhood: {row['Neighborhood']}, "
                                  f"Sale Price: ${row['Sale Price']:,.2f}, "
                                  f"Rent Estimate: ${row['Monthly Rent Estimate']:,.2f}/month, "
                                  f"Rent-to-Price Ratio: {row['Rent_to_Price_Ratio']:.4f}\n")
            messagebox.showinfo("Investment Opportunities", investment_text)
        except Exception as e:
            messagebox.showerror("Error", str(e))

# Run the application
if __name__ == "__main__":
    root = tk.Tk()
    app = RealEstateApp(root)
    root.mainloop()