In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Synthetic Data One

In [2]:
def generate_building_data():
    # Initialize lists to store data
    data = []
    
    # Start date and time
    start_date = datetime(2024, 1, 1, 7, 0)  # Starting from 7:00 AM on a Sunday
    days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
    
    # Generate data for 7 days
    for day_num in range(7):
        current_date = start_date + timedelta(days=day_num)
        
        # Generate 72 entries per day (20-minute intervals)
        for interval in range(72):
            current_time = current_date + timedelta(minutes=20*interval)
            time_str = current_time.strftime("%H:%M")
            
            # Temperature pattern (°C): Lower at night, higher during day
            hour = current_time.hour
            base_temp = 20  # Base temperature
            temp_variation = 5 * np.sin(np.pi * (hour - 6) / 12)  # Peak at 12-1 PM
            temp = round(base_temp + temp_variation + np.random.normal(0, 0.5), 1)
            
            # Humidity pattern (%): Higher at night and early morning
            base_humidity = 60
            humidity_variation = -10 * np.sin(np.pi * (hour - 6) / 12)
            humidity = round(min(85, max(40, base_humidity + humidity_variation + np.random.normal(0, 2))), 1)
            
            # Light intensity (lux): Following daylight pattern
            if 6 <= hour < 18:  # Daytime
                light = round(max(0, 1000 * np.sin(np.pi * (hour - 6) / 12) + np.random.normal(0, 100)))
            else:  # Night time
                light = round(max(0, np.random.normal(5, 2)))  # Very low light at night
            
            # Occupancy pattern
            is_weekend = days[day_num] in ['Saturday', 'Sunday']
            if is_weekend:
                occupancy = 1 if (10 <= hour <= 18) and np.random.random() < 0.3 else 0
            else:  # Weekday
                if 9 <= hour <= 17:  # Working hours
                    occupancy = 1 if np.random.random() < 0.8 else 0
                elif 7 <= hour < 9 or 17 < hour <= 19:  # Transition hours
                    occupancy = 1 if np.random.random() < 0.4 else 0
                else:  # Night time
                    occupancy = 0
            
            # Energy consumption (kWh): Based on all factors
            base_energy = 5  # Base load
            
            # HVAC impact (temperature-dependent)
            temp_impact = 0.5 * abs(temp - 22)  # Optimal temperature is 22°C
            
            # Occupancy impact
            occupancy_impact = 3 if occupancy else 0
            
            # Time of day impact
            time_impact = 2 * np.sin(np.pi * (hour - 6) / 12)
            
            # Light usage impact
            light_impact = 0.5 if light < 200 and occupancy == 1 else 0
            
            # Weekend/Weekday factor
            day_factor = 0.7 if is_weekend else 1.0
            
            # Calculate total energy with some random variation
            energy = round(day_factor * (base_energy + temp_impact + occupancy_impact + time_impact + light_impact) + 
                         np.random.normal(0, 0.5), 2)
            
            # Append the data
            data.append({
                'Time': time_str,
                'Day': days[day_num],
                'Temp': temp,
                'Humidity': humidity,
                'Light_Intensity': light,
                'Occupancy': occupancy,
                'Energy': energy
            })
    
    df = pd.DataFrame(data)
    return df

In [3]:
df = generate_building_data()

df.to_csv('building_energy_data.csv', index=False)

print("\nDataset shape:", df.shape)
display(df.head())


Dataset shape: (504, 7)


Unnamed: 0,Time,Day,Temp,Humidity,Light_Intensity,Occupancy,Energy
0,07:00,Sunday,22.3,58.5,209,0,3.33
1,07:20,Sunday,21.4,57.8,115,0,3.73
2,07:40,Sunday,20.8,57.2,164,0,5.56
3,08:00,Sunday,21.9,55.7,530,0,4.01
4,08:20,Sunday,22.5,56.4,591,0,3.71


In [4]:
df['Occupancy'].value_counts()

Occupancy
0    354
1    150
Name: count, dtype: int64

# Synthetic Data Two

In [5]:
def generate_building_data(num_rows=2000):
    # Initialize lists to store data
    data = []
    
    # Start date and time
    start_date = datetime(2024, 1, 1, 7, 0)  # Starting from 7:00 AM on a Sunday
    days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
    
    # Calculate number of complete days needed
    intervals_per_day = 72  # 20-minute intervals in a day
    
    # Generate data for the specified number of rows
    for row in range(num_rows):
        current_date = start_date + timedelta(minutes=20*row)
        time_str = current_date.strftime("%H:%M")
        day_name = days[current_date.weekday()]
        
        # Temperature pattern (°C): Lower at night, higher during day
        hour = current_date.hour
        # Add some day-to-day variation in base temperature
        daily_temp_variation = np.sin(2 * np.pi * row / (intervals_per_day * 7)) * 2  # Weekly temperature cycle
        base_temp = 20 + daily_temp_variation
        temp_variation = 5 * np.sin(np.pi * (hour - 6) / 12)  # Peak at 12-1 PM
        temp = round(base_temp + temp_variation + np.random.normal(0, 0.5), 1)
        
        # Humidity pattern (%): Higher at night and early morning
        base_humidity = 60
        humidity_variation = -10 * np.sin(np.pi * (hour - 6) / 12)
        # Add some day-to-day variation in humidity
        daily_humidity_variation = np.cos(2 * np.pi * row / (intervals_per_day * 7)) * 5
        humidity = round(min(85, max(40, base_humidity + humidity_variation + daily_humidity_variation + np.random.normal(0, 2))), 1)
        
        # Light intensity (lux): Following daylight pattern
        if 6 <= hour < 18:  # Daytime
            # Add some variation for cloudy days
            cloud_factor = max(0.3, min(1, np.random.normal(0.8, 0.2)))
            light = round(max(0, cloud_factor * 1000 * np.sin(np.pi * (hour - 6) / 12) + np.random.normal(0, 100)))
        else:  # Night time
            light = round(max(0, np.random.normal(5, 2)))
        
        # Occupancy pattern
        is_weekend = day_name in ['Saturday', 'Sunday']
        if is_weekend:
            occupancy = 1 if (10 <= hour <= 18) and np.random.random() < 0.3 else 0
        else:  # Weekday
            if 9 <= hour <= 17:  # Working hours
                occupancy = 1 if np.random.random() < 0.8 else 0
            elif 7 <= hour < 9 or 17 < hour <= 19:  # Transition hours
                occupancy = 1 if np.random.random() < 0.4 else 0
            else:  # Night time
                occupancy = 0
        
        # Energy consumption (kWh): Based on all factors
        base_energy = 5  # Base load
        
        # HVAC impact (temperature-dependent)
        temp_impact = 0.5 * abs(temp - 22)  # Optimal temperature is 22°C
        
        # Occupancy impact
        occupancy_impact = 3 if occupancy else 0
        
        # Time of day impact
        time_impact = 2 * np.sin(np.pi * (hour - 6) / 12)
        
        # Light usage impact
        light_impact = 0.5 if light < 200 and occupancy == 1 else 0
        
        # Weekend/Weekday factor
        day_factor = 0.7 if is_weekend else 1.0
        
        # Seasonal variation (simulating a month-long pattern)
        seasonal_factor = 1 + 0.1 * np.sin(2 * np.pi * row / num_rows)
        
        # Calculate total energy with some random variation
        energy = round(seasonal_factor * day_factor * (base_energy + temp_impact + occupancy_impact + 
                                                     time_impact + light_impact) + np.random.normal(0, 0.5), 2)
        
        # Append the data
        data.append({
            'Time': time_str,
            'Day': day_name,
            'Temp': temp,
            'Humidity': humidity,
            'Light_Intensity': light,
            'Occupancy': occupancy,
            'Energy': energy
        })
    
    df = pd.DataFrame(data)
    return df

In [8]:
data = generate_building_data(2000)

In [9]:
print("Dataset shape:", data.shape)
print("\nSummary statistics:")
print(data.describe())

print("\nFirst few rows:")
display(data)

data.to_csv('generated_two.csv', index=False)

Dataset shape: (2000, 7)

Summary statistics:
              Temp     Humidity  Light_Intensity    Occupancy       Energy
count  2000.000000  2000.000000      2000.000000  2000.000000  2000.000000
mean     20.033550    59.912500       252.529500     0.298000     7.126735
std       3.817697     8.069426       316.731801     0.457494     2.483395
min      12.400000    40.400000         0.000000     0.000000     2.730000
25%      16.900000    53.600000         5.000000     0.000000     5.200000
50%      20.050000    59.700000         9.000000     0.000000     6.460000
75%      23.200000    66.300000       499.250000     1.000000     8.855000
max      27.700000    78.100000      1258.000000     1.000000    14.610000

First few rows:


Unnamed: 0,Time,Day,Temp,Humidity,Light_Intensity,Occupancy,Energy
0,07:00,Sunday,21.2,63.2,295,0,3.79
1,07:20,Sunday,22.0,61.5,245,0,3.61
2,07:40,Sunday,21.2,63.7,326,0,3.55
3,08:00,Sunday,22.8,59.2,275,0,3.98
4,08:20,Sunday,22.4,59.2,323,0,4.03
...,...,...,...,...,...,...,...
1995,00:00,Sunday,13.6,77.2,5,0,4.87
1996,00:20,Sunday,14.5,73.8,2,0,4.14
1997,00:40,Sunday,14.5,76.7,8,0,3.76
1998,01:00,Sunday,14.6,75.7,6,0,5.10
