In [34]:
import pandas as pd
import holidays

# Load your dataset
df = pd.read_csv('Entire_load_data_2021_2024.csv')

# Rename 'Unnamed: 0' to 'Time' if needed
df.rename(columns={'Unnamed: 0': 'Time'}, inplace=True)

# Ensure 'Time' is in datetime format
df['Time'] = pd.to_datetime(df['Time'], utc=True)

# Extract calendar-based features
df['hour'] = df['Time'].dt.hour            # Hour of the day
df['day_of_week'] = df['Time'].dt.dayofweek # Day of the week (0=Monday, 6=Sunday)
df['month'] = df['Time'].dt.month          # Month
df['day_of_year'] = df['Time'].dt.dayofyear # Day of the year
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)  # Weekend flag (Saturday=5, Sunday=6)

# Add German public holidays
german_holidays = holidays.Germany()

# Create a holiday flag
df['is_holiday'] = df['Time'].dt.date.apply(lambda x: int(x in german_holidays))

# Verify the updated DataFrame
print(df[['Time', 'hour', 'day_of_week', 'month', 'is_weekend', 'is_holiday']].head())

# Save the updated dataset
df.to_csv('updated_load_data_with_german_holidays.csv', index=False)


                       Time  hour  day_of_week  month  is_weekend  is_holiday
0 2021-06-01 00:00:00+00:00     0            1      6           0           0
1 2021-06-01 00:15:00+00:00     0            1      6           0           0
2 2021-06-01 00:30:00+00:00     0            1      6           0           0
3 2021-06-01 00:45:00+00:00     0            1      6           0           0
4 2021-06-01 01:00:00+00:00     1            1      6           0           0


In [35]:
# Save all columns, including Actual Load
df.to_csv('merged_load_data_with_calendar_features.csv', index=False)


In [36]:
# Check the columns before saving
print(df.columns)


Index(['Time', 'Actual Load', 'hour', 'day_of_week', 'month', 'day_of_year',
       'is_weekend', 'is_holiday'],
      dtype='object')


In [38]:
# Extract day of the week and start it from 1 (1=Monday, 7=Sunday)
df['day_of_week'] = df['Time'].dt.dayofweek + 1


In [39]:
# Save the updated dataset to a CSV file
df.to_csv('updated_load_data.csv', index=False)
