# Real Estate Investment Decision Support System
## Multi-Criteria Analysis with ELECTRE III and Pareto Optimization

This notebook demonstrates the complete analysis pipeline for identifying optimal rental property investments in the Paris region.

### 1. Setup and Imports

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.main import run_pipeline
from src.preprocessing import PropertyDataPreprocessor
from src.electre import ELECTREIII
from src.pareto import ParetoOptimizer

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 2)

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Setup complete!")

### 2. Run Complete Analysis Pipeline

In [None]:
# Run the analysis pipeline
results = run_pipeline(use_sample_data=True)

### 3. Load and Explore Results

In [None]:
# Load processed data
df_processed = pd.read_csv('../data/processed/properties_processed.csv')
df_ranked = pd.read_csv('../data/processed/electre_ranked.csv')
pareto_front = pd.read_csv('../data/processed/pareto_front.csv')
robust_core = pd.read_csv('../data/processed/robust_core.csv')

print(f"Processed properties: {len(df_processed)}")
print(f"Pareto front: {len(pareto_front)}")
print(f"Robust core: {len(robust_core)}")

### 4. Visualize Top Investment Opportunities

In [None]:
# Display top 5 properties from robust core
top_5 = robust_core.head(5)
display(top_5[['price', 'surface', 'price_per_sqm', 'rental_yield', 'roi', 'annual_cash_flow', 'aggregate_score']])

### 5. Visualizations

In [None]:
# ROI vs Rental Yield scatter plot
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(df_ranked['rental_yield'], df_ranked['roi'], alpha=0.3, label='All properties')
plt.scatter(pareto_front['rental_yield'], pareto_front['roi'], alpha=0.6, label='Pareto front', color='orange')
plt.scatter(robust_core['rental_yield'], robust_core['roi'], s=100, label='Robust core', color='red')
plt.xlabel('Rental Yield (%)')
plt.ylabel('ROI (%)')
plt.title('ROI vs Rental Yield')
plt.legend()
plt.grid(True, alpha=0.3)

# Price vs Cash Flow
plt.subplot(1, 2, 2)
plt.scatter(df_ranked['price'], df_ranked['annual_cash_flow'], alpha=0.3, label='All properties')
plt.scatter(pareto_front['price'], pareto_front['annual_cash_flow'], alpha=0.6, label='Pareto front', color='orange')
plt.scatter(robust_core['price'], robust_core['annual_cash_flow'], s=100, label='Robust core', color='red')
plt.xlabel('Price (€)')
plt.ylabel('Annual Cash Flow (€)')
plt.title('Price vs Annual Cash Flow')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### 6. Summary Statistics

In [None]:
# Compare statistics across different sets
print("\n=== All Properties ===")
print(df_ranked[['price', 'rental_yield', 'roi', 'annual_cash_flow']].describe())

print("\n=== Pareto Front ===")
print(pareto_front[['price', 'rental_yield', 'roi', 'annual_cash_flow']].describe())

print("\n=== Robust Core (Optimal Investments) ===")
print(robust_core[['price', 'rental_yield', 'roi', 'annual_cash_flow']].describe())

### 7. Criteria Distribution

In [None]:
# Plot distribution of key criteria
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Price distribution
axes[0, 0].hist(df_ranked['price'], bins=30, alpha=0.5, label='All')
axes[0, 0].hist(robust_core['price'], bins=30, alpha=0.7, label='Robust core')
axes[0, 0].set_xlabel('Price (€)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Price Distribution')
axes[0, 0].legend()

# Rental Yield distribution
axes[0, 1].hist(df_ranked['rental_yield'], bins=30, alpha=0.5, label='All')
axes[0, 1].hist(robust_core['rental_yield'], bins=30, alpha=0.7, label='Robust core')
axes[0, 1].set_xlabel('Rental Yield (%)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Rental Yield Distribution')
axes[0, 1].legend()

# ROI distribution
axes[1, 0].hist(df_ranked['roi'], bins=30, alpha=0.5, label='All')
axes[1, 0].hist(robust_core['roi'], bins=30, alpha=0.7, label='Robust core')
axes[1, 0].set_xlabel('ROI (%)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('ROI Distribution')
axes[1, 0].legend()

# Cash Flow distribution
axes[1, 1].hist(df_ranked['annual_cash_flow'], bins=30, alpha=0.5, label='All')
axes[1, 1].hist(robust_core['annual_cash_flow'], bins=30, alpha=0.7, label='Robust core')
axes[1, 1].set_xlabel('Annual Cash Flow (€)')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_title('Annual Cash Flow Distribution')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

### 8. Conclusion

This analysis has identified the optimal investment opportunities using:
- Multi-criteria evaluation across 8 financial metrics
- ELECTRE III for robust outranking analysis
- Pareto optimization to identify non-dominated solutions

The robust core contains the properties that represent the best trade-offs across all criteria.