# TFT (Long forecasting)
#### Sliding Window Forecasting - 3 years in, 1 year out

### Need for runnning colab

In [None]:
# !pip install torch torchvision torchaudio pandas numpy scikit-learn

# from google.colab import files
# uploaded = files.upload()

# # Mount Google Drive
# from google.colab import drive
# drive.mount('/content/drive')

## Importing Data

In [1]:
import pandas as pd

# Correct the file path to the actual location of the CSV file
df = pd.read_csv('/Users/casper/Documents/GitHub/p9-energy/Dataset/ConsumptionIndustry.csv', sep=';')

# Load the dataset for colab
# df = pd.read_csv('ConsumptionIndustry.csv', sep=';')


# Convert HourDK to datetime
df['HourDK'] = pd.to_datetime(df['HourDK'])

# Convert ConsumptionkWh to numeric
df['ConsumptionkWh'] = df['ConsumptionkWh'].str.replace(",", ".").astype(float)


print(df.head())
print('\n')
print(df.tail())
print('\n')
print(df.info())
print('\n')

            HourUTC              HourDK  MunicipalityNo Branche  \
0  2021-01-01 00:00 2021-01-01 01:00:00             851  Privat   
1  2021-01-01 01:00 2021-01-01 02:00:00             851  Privat   
2  2021-01-01 02:00 2021-01-01 03:00:00             851  Privat   
3  2021-01-01 03:00 2021-01-01 04:00:00             851  Privat   
4  2021-01-01 04:00 2021-01-01 05:00:00             851  Privat   

   ConsumptionkWh  
0       35086.772  
1       31777.762  
2       28423.659  
3       25675.926  
4       24283.909  


                HourUTC              HourDK  MunicipalityNo Branche  \
33834  2024-11-10 18:00 2024-11-10 19:00:00             851  Privat   
33835  2024-11-10 19:00 2024-11-10 20:00:00             851  Privat   
33836  2024-11-10 20:00 2024-11-10 21:00:00             851  Privat   
33837  2024-11-10 21:00 2024-11-10 22:00:00             851  Privat   
33838  2024-11-10 22:00 2024-11-10 23:00:00             851  Privat   

       ConsumptionkWh  
33834       48584.696  


## Data preperation + Feature Engineering

In [None]:
import numpy as np

df['HourDK'] = pd.to_datetime(df['HourDK'])

# Lag features
df['ConsumptionkWh_lag1'] = df['ConsumptionkWh'].shift(1)
df['ConsumptionkWh_lag24'] = df['ConsumptionkWh'].shift(24)
df['ConsumptionkWh_lag168'] = df['ConsumptionkWh'].shift(168)


# Rolling Average
df['ConsumptionkWh_roll24'] = df['ConsumptionkWh'].rolling(window=24).mean()
df['ConsumptionkWh_roll168'] = df['ConsumptionkWh'].rolling(window=168).mean()

# Holidays in Denmark from 2021 to 2024 (source: https://publicholidays.dk/)
holidays = ['2021-01-01', '2021-04-01', '2021-04-02', '2021-04-05', '2021-05-13', '2021-05-21', '2021-06-01', '2021-06-24', '2021-12-24', '2021-12-25', '2021-12-26', '2021-12-31', '2022-01-01', '2022-04-14', '2022-04-15', '2022-04-18', '2022-05-05', '2022-05-13', '2022-05-26', '2022-06-05', '2022-06-24', '2022-12-24', '2022-12-25', '2022-12-26', '2022-12-31', '2023-01-01', '2023-03-24', '2023-03-25', '2023-03-26', '2023-04-07', '2023-05-05', '2023-05-13', '2023-05-26', '2023-06-05', '2023-06-24', '2023-12-24', '2023-12-25', '2023-12-26', '2023-12-31', '2024-01-01', '2024-03-28', '2024-03-29', '2024-03-30', '2024-04-05', '2024-05-05', '2024-05-13', '2024-05-26', '2024-06-05', '2024-06-24']
holidays = pd.to_datetime(holidays)
df['is_holiday'] = df['HourDK'].dt.date.isin(holidays.date)

# Weekday and weekend flag
df['day_of_week'] = df['HourDK'].dt.dayofweek
df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)


# Hour of the Day (0-23) to sine/cosine transformation
df['hour_sin'] = np.sin(2 * np.pi * df['HourDK'].dt.hour / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['HourDK'].dt.hour / 24)

# Day of the Week (0-6) to sine/cosine transformation
df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)

# Month of the Year (1-12) to sine/cosine transformation
df['month_sin'] = np.sin(2 * np.pi * df['HourDK'].dt.month / 12)
df['month_cos'] = np.cos(2 * np.pi * df['HourDK'].dt.month / 12)

# drop Nan values
df = df.dropna()

print(df.head())
print(df.info())

              HourUTC              HourDK  MunicipalityNo Branche  \
168  2021-01-08 00:00 2021-01-08 01:00:00             851  Privat   
169  2021-01-08 01:00 2021-01-08 02:00:00             851  Privat   
170  2021-01-08 02:00 2021-01-08 03:00:00             851  Privat   
171  2021-01-08 03:00 2021-01-08 04:00:00             851  Privat   
172  2021-01-08 04:00 2021-01-08 05:00:00             851  Privat   

     ConsumptionkWh  ConsumptionkWh_lag1  ConsumptionkWh_lag24  \
168       26017.693            28924.472             26466.212   
169       24636.978            26017.693             24937.988   
170       24047.257            24636.978             24296.799   
171       24098.255            24047.257             24204.419   
172       25498.785            24098.255             25616.628   

     ConsumptionkWh_lag168  ConsumptionkWh_roll24  ConsumptionkWh_roll168  \
168              35086.772           42256.679583            42076.515690   
169              31777.762        