### Setup and imports

In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
random.seed(42)
np.random.seed(42)

### Base data

In [4]:
# Example values
locations = ['Milan', 'Rome', 'Naples', 'Florence', 'Turin', 'Bologna', 'Palermo', 'Genoa']
energy_classes = ['A', 'B', 'C', 'D', 'E', 'F', 'G']

n_rows = 150

### Generate data rows function

In [10]:
def generate_property(index):
    size = np.random.randint(40, 200)  # m²
    rooms = np.random.randint(2, 7)
    bathrooms = np.random.randint(1, 4)
    year_built = np.random.randint(1950, 2023)
    floor = np.random.randint(0, 5)
    building_floors = np.random.randint(floor + 1, 10)
    
    has_elevator = int(building_floors >= 4)
    has_garden = int(random.random() < 0.3)
    has_balcony = int(random.random() < 0.6)
    garage = int(random.random() < 0.5)
    
    energy_class = random.choice(energy_classes)
    humidity = round(np.random.uniform(30, 70), 1)  # in percento
    temperature = round(np.random.uniform(12, 25), 1)
    noise = np.random.randint(20, 80)  # 0 = silenzio, 100 = forte
    aqi = np.random.randint(30, 150)  # AQI
    location = random.choice(locations)

    # Stima del valore base
    base_price = size * np.random.uniform(1.2, 3.5)  # €/m² in migliaia
    if energy_class in ['A', 'B']:
        base_price *= 1.05
    if has_garden:
        base_price += 10
    if has_balcony:
        base_price += 5
    if garage:
        base_price += 7

    valuation = round(base_price / 1000, 2)  # in k€

    return {
        "asset_id": f"asset_{index:04}",
        "location": location,
        "size_m2": size,
        "rooms": rooms,
        "bathrooms": bathrooms,
        "year_built": year_built,
        "floor": floor,
        "building_floors": building_floors,
        "has_elevator": has_elevator,
        "has_garden": has_garden,
        "has_balcony": has_balcony,
        "garage": garage,
        "energy_class": energy_class,
        "humidity_level": humidity,
        "temperature_avg": temperature,
        "noise_level": noise,
        "air_quality_index": aqi,
        "valuation_k": valuation
    }

### Generate DataFrame

In [9]:
data = [generate_property(i) for i in range(150)]
df = pd.DataFrame(data)

df.head()

Unnamed: 0,asset_id,location,size_m2,rooms,bathrooms,year_built,floor,building_floors,has_elevator,has_garden,has_balcony,garage,energy_class,humidity_level,temperature_avg,noise_level,air_quality_index,valuation_k
0,asset_0000,Genoa,70,4,2,1957,2,9,1,0,0,1,G,68.3,14.9,27,100,0.24
1,asset_0001,Florence,154,3,1,2006,2,8,1,0,0,0,D,60.1,18.9,24,146,0.34
2,asset_0002,Bologna,99,3,3,1994,4,6,1,0,0,1,E,39.5,13.4,48,52,0.24
3,asset_0003,Rome,145,3,3,1989,1,9,1,0,1,0,D,41.0,23.7,25,141,0.47
4,asset_0004,Florence,106,3,3,1992,0,4,1,0,0,0,C,43.0,15.0,38,34,0.34


### Export CSV

In [11]:
output_path = "../data/property_dataset_mvp.csv"
df.to_csv(output_path, index=False)

print(f"Dataset saved in: {output_path}")

Dataset saved in: ../data/property_dataset_mvp.csv
