In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [None]:
# Simulation parameters
num_meters = 50
start_date = datetime(2025, 4, 1)
end_date = datetime(2025, 5, 1)
time_interval = timedelta(minutes=15)  # finer granularity

In [None]:
# Static configurations
meter_ids = [f"meter_{i+1}" for i in range(num_meters)]
meter_types = ['residential', 'industrial']
districts = ['North', 'South', 'Central', 'East', 'West']

In [None]:
data = []

# Main simulation loop
current_time = start_date
while current_time < end_date:
    for meter_id in meter_ids:
        meter_type = random.choice(meter_types)
        district = random.choice(districts)
        status = random.choices(['active', 'offline', 'maintenance'], weights=[0.9, 0.05, 0.05])[0]

        # Base values
        flow = np.random.normal(loc=15 if meter_type == 'residential' else 35, scale=5)
        pressure = np.random.normal(loc=60, scale=10)

        # Sensor errors
        if random.random() < 0.02:
            flow = -abs(flow)
        if random.random() < 0.02:
            pressure = None

        updated_at = current_time + timedelta(minutes=random.randint(1, 10))

        data.append({
            "meter_id": meter_id,
            "timestamp": current_time,
            "flow_lmin": round(flow, 2),
            "pressure_psi": round(pressure, 2) if pressure is not None else None,
            "latitude": round(np.random.uniform(6.4, 6.6), 6),
            "longitude": round(np.random.uniform(3.3, 3.5), 6),
            "updated_at": updated_at,
            "meter_type": meter_type,
            "district": district,
            "status": status
        })
    current_time += time_interval

df = pd.DataFrame(data)

In [None]:
# Add 5% duplicates with modified timestamps
duplicates = df.sample(frac=0.05)
duplicates['updated_at'] = duplicates['updated_at'] + pd.to_timedelta(np.random.randint(1, 5, size=len(duplicates)), unit='m')
df = pd.concat([df, duplicates], ignore_index=True)

In [None]:
# Export
df.to_csv("smart_water_data.csv", index=False)
print("Data exported to smart_water_data.csv")

Data exported to smart_water_data.csv
