In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# --- Configuration ---
START_DATE = datetime(2023, 1, 1)
END_DATE = datetime(2024, 12, 31)
NUM_DAYS = (END_DATE - START_DATE).days + 1
NUM_PROPERTIES = 1000 # Total unique properties
CITIES = [
    {"name": "New York", "lat": 40.7128, "lon": -74.0060, "country": "USA", "state": "NY"},
    {"name": "Los Angeles", "lat": 34.0522, "lon": -118.2437, "country": "USA", "state": "CA"},
    {"name": "London", "lat": 51.5074, "lon": -0.1278, "country": "UK", "state": "England"},
    {"name": "Paris", "lat": 48.8566, "lon": 2.3522, "country": "France", "state": "Ile-de-France"},
    {"name": "Sydney", "lat": -33.8688, "lon": 151.2093, "country": "Australia", "state": "NSW"},
    {"name": "Toronto", "lat": 43.6532, "lon": -79.3832, "country": "Canada", "state": "ON"},
    {"name": "Berlin", "lat": 52.5200, "lon": 13.4050, "country": "Germany", "state": "Berlin"}
]
PROPERTY_TYPES = ['Apartment', 'House', 'Commercial']

# --- Generate Property IDs ---
property_ids = [f"PROP-{i:04d}" for i in range(NUM_PROPERTIES)]

# --- Generate Daily Data Points for Properties ---
data = []
np.random.seed(45)
random.seed(45)

for prop_id in property_ids:
    city_data = random.choice(CITIES)
    prop_type = random.choice(PROPERTY_TYPES)
    bedrooms = random.choice([1, 2, 3, 4]) if prop_type != 'Commercial' else random.choice([0, 1]) # Commercial can have 0-1 beds
    bathrooms = random.choice([1, 1.5, 2])
    sq_footage = random.randint(500, 3000) if prop_type != 'Commercial' else random.randint(1000, 10000)

    # Base price/yield for the property
    base_price = np.random.uniform(200000, 1500000)
    base_yield = np.random.uniform(0.03, 0.07) # 3-7% yield

    # Introduce some variation per property type
    if prop_type == 'House':
        base_price *= 1.2
        base_yield *= 0.9
    elif prop_type == 'Commercial':
        base_price *= 2.0
        base_yield *= 1.1

    for i in range(NUM_DAYS):
        current_date = START_DATE + timedelta(days=i)

        # Simulate price fluctuation
        price_noise = np.random.normal(0, base_price * 0.005) # Daily noise
        seasonal_factor = np.sin(current_date.month / 12 * 2 * np.pi) * 0.02 # Monthly seasonality
        demand_effect = np.random.normal(0, 0.001)
        supply_effect = np.random.normal(0, 0.001)

        sale_price = base_price * (1 + (current_date - START_DATE).days / 365 * 0.02 + seasonal_factor + price_noise)
        rental_yield = base_yield * (1 + np.random.normal(0, 0.005) + seasonal_factor)

        # Simulate market indicators (can fluctuate daily for overall market)
        market_demand = max(0, min(100, 50 + np.sin(i / 30 * np.pi) * 20 + np.random.normal(0, 5)))
        market_supply = max(0, min(100, 50 - np.cos(i / 45 * np.pi) * 15 + np.random.normal(0, 4)))

        data.append({
            'PropertyID': prop_id,
            'Date': current_date.strftime('%Y-%m-%d'),
            'City': city_data['name'],
            'State': city_data['state'],
            'Country': city_data['country'],
            'Latitude': city_data['lat'] + np.random.uniform(-0.1, 0.1), # Small random offset for map
            'Longitude': city_data['lon'] + np.random.uniform(-0.1, 0.1), # Small random offset for map
            'PropertyType': prop_type,
            'Bedrooms': bedrooms,
            'Bathrooms': bathrooms,
            'SquareFootage': sq_footage,
            'SalePrice': round(sale_price, 2),
            'RentalYield': round(rental_yield, 4), # Store as decimal for Power BI
            'MarketDemandIndex': round(market_demand, 2),
            'MarketSupplyIndex': round(market_supply, 2)
        })

real_estate_df = pd.DataFrame(data)
real_estate_df.to_csv('real_estate_market_data.csv', index=False)
print("Generated real_estate_market_data.csv")
print(real_estate_df.head())
print(real_estate_df.info())
print(real_estate_df.describe())


Generated real_estate_market_data.csv
  PropertyID        Date    City    State Country   Latitude  Longitude  \
0  PROP-0000  2023-01-01  London  England      UK  51.578636  -0.097780   
1  PROP-0000  2023-01-02  London  England      UK  51.509340  -0.205308   
2  PROP-0000  2023-01-03  London  England      UK  51.527601  -0.146441   
3  PROP-0000  2023-01-04  London  England      UK  51.552727  -0.165785   
4  PROP-0000  2023-01-05  London  England      UK  51.591856  -0.094534   

  PropertyType  Bedrooms  Bathrooms  SquareFootage     SalePrice  RentalYield  \
0        House         4        1.5            835 -6.278213e+09       0.0466   
1        House         4        1.5            835 -2.474749e+08       0.0474   
2        House         4        1.5            835 -1.929297e+09       0.0469   
3        House         4        1.5            835 -7.937438e+09       0.0469   
4        House         4        1.5            835 -3.012814e+09       0.0473   

   MarketDemandIndex  Ma