In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load data
file_paths = {
    "sales_data": "Metro_sales_count_now_uc_sfrcondo_month.csv",
    "zhvf_growth": "Metro_zhvf_growth_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv",
    "zori": "Metro_zori_uc_sfrcondomfr_sm_month.csv",
}

# Read the data files
dataframes = {key: pd.read_csv(path) for key, path in file_paths.items()}

# Sales ROI Analysis
sales_data = dataframes["sales_data"]

# Identify the latest date and growth columns
latest_date = sales_data.columns[sales_data.columns.str.match(r"\d{4}-\d{2}-\d{2}")].max()
growth_column = sales_data.columns[sales_data.columns.str.contains("growth", case=False)].max()

# Ensure numeric columns for calculations and handle missing data
sales_data[latest_date] = pd.to_numeric(sales_data[latest_date], errors="coerce")
sales_data[growth_column] = pd.to_numeric(sales_data[growth_column], errors="coerce")
sales_data.dropna(subset=[latest_date, growth_column], inplace=True)

# Calculate purchase price and projected price
purchase_price = sales_data[latest_date]
sales_data["Projected_Price"] = purchase_price * (1 + sales_data[growth_column] / 100)

# Mortgage Calculations
down_payment = purchase_price * 0.20
loan_amount = purchase_price - down_payment
annual_interest_rate = 0.03
loan_term_years = 30
monthly_mortgage = (
    loan_amount * annual_interest_rate / 12 /
    (1 - (1 + annual_interest_rate / 12) ** (-loan_term_years * 12))
)
total_mortgage_payments = monthly_mortgage * loan_term_years * 12

# Profit and ROI
sales_data["Profit"] = sales_data["Projected_Price"] - purchase_price - total_mortgage_payments
sales_data["Sales_ROI (%)"] = (sales_data["Profit"] / purchase_price) * 100

# Rental ROI Analysis
zori = dataframes["zori"]

# Identify the latest rent date
latest_rent_date = zori.columns[zori.columns.str.match(r"\d{4}-\d{2}-\d{2}")].max()

# Ensure numeric columns for calculations and handle missing data
zori[latest_rent_date] = pd.to_numeric(zori[latest_rent_date], errors="coerce")
zori.dropna(subset=[latest_rent_date], inplace=True)

# Merge rental data with sales data
rental_data = sales_data.merge(
    zori[["RegionName", latest_rent_date]],
    on="RegionName",
    suffixes=("_sales", "_rent")
)

# Calculate monthly and annual rental income
rental_data["Monthly_Rental_Income"] = rental_data[latest_rent_date]
rental_data["Annual_Rental_Income"] = rental_data["Monthly_Rental_Income"] * 12

# Assume maintenance costs and net annual rental income
maintenance_costs = 200 * 12  # $200/month for maintenance
annual_taxes = 0.01 * purchase_price  # 1% of purchase price annually
rental_data["Net_Annual_Rental_Income"] = (
    rental_data["Annual_Rental_Income"] - maintenance_costs - annual_taxes
)

# Calculate rental ROI
rental_data["Rental_ROI (%)"] = (rental_data["Net_Annual_Rental_Income"] / purchase_price) * 100

# Combined ROI Analysis
combined_roi = sales_data[["RegionName", "Sales_ROI (%)"]].merge(
    rental_data[["RegionName", "Rental_ROI (%)"]],
    on="RegionName"
)

# Handle outliers and invalid values in combined ROI
combined_roi.replace([np.inf, -np.inf], np.nan, inplace=True)
combined_roi.dropna(subset=["Sales_ROI (%)", "Rental_ROI (%)"], inplace=True)

# Normalize ROI values
scaler = MinMaxScaler()
combined_roi[["Sales_ROI_norm", "Rental_ROI_norm"]] = scaler.fit_transform(
    combined_roi[["Sales_ROI (%)", "Rental_ROI (%)"]]
)

# Calculate overall ROI score (weighted equally for sales and rental)
combined_roi["Overall_ROI_Score"] = (
    combined_roi["Sales_ROI_norm"] + combined_roi["Rental_ROI_norm"]
) / 2

# Rank regions based on the overall ROI score
top_overall_regions = combined_roi.sort_values(by="Overall_ROI_Score", ascending=False).head(10)

# Display top regions
print(top_overall_regions[["RegionName", "Sales_ROI (%)", "Rental_ROI (%)", "Overall_ROI_Score"]])


KeyError: nan