In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [10]:
# Parameters
start_date = datetime(2025, 3, 10)  # Set a start date
n_days = 300                        # Number of days to simulate
n_minutes = 240                     # One 4 hour (240 minutes)

all_data = []

for day in range(n_days):
    current_day = start_date + timedelta(days=day)
    # Generate timestamps for one hour starting at 8:00 AM each day
    day_timestamps = [current_day.replace(hour=8, minute=0, second=0) + timedelta(minutes=i) for i in range(n_minutes)]
    
    # Generate water usage data using a beta distribution:
    # Beta(0.5, 4) gives most values near 0. Then scale to [0.4, 2] using: value = beta_value * 1.6 + 0.4
    usage = np.random.beta(0.5, 4, size=n_minutes) * 1.6 + 0.4

    for t, u in zip(day_timestamps, usage):
        all_data.append({'timestamp': t, 'water_usage_liters': round(u, 2)})

# Create DataFrame and save to CSV
df = pd.DataFrame(all_data)
df.to_csv("water_usage_300_days.csv", index=False)
df


Unnamed: 0,timestamp,water_usage_liters
0,2025-03-10 08:00:00,1.00
1,2025-03-10 08:01:00,0.64
2,2025-03-10 08:02:00,0.43
3,2025-03-10 08:03:00,0.40
4,2025-03-10 08:04:00,0.67
...,...,...
71995,2026-01-03 11:55:00,0.40
71996,2026-01-03 11:56:00,0.50
71997,2026-01-03 11:57:00,0.53
71998,2026-01-03 11:58:00,0.42


In [6]:
df

Unnamed: 0,timestamp,water_usage_liters
0,2025-03-10 08:00:00,0.47
1,2025-03-10 08:01:00,0.76
2,2025-03-10 08:02:00,1.07
3,2025-03-10 08:03:00,0.46
4,2025-03-10 08:04:00,0.41
...,...,...
2395,2025-04-18 08:55:00,0.40
2396,2025-04-18 08:56:00,0.62
2397,2025-04-18 08:57:00,0.59
2398,2025-04-18 08:58:00,0.97
