In [2]:
import pandas as pd
import numpy as np

def elongate(df):
    df_long = pd.wide_to_long(df, i = "PRICES", j = "hour", stubnames=["Hour"], sep = " ").reset_index()
    df_long.rename(columns={"Hour": "price", "PRICES": "date"}, inplace = True)
    df_long['datetime'] = pd.to_datetime(df_long['date']) + pd.to_timedelta(df_long['hour'], unit='h')
    df_long.sort_values(['datetime'], ascending=[True], inplace=True)
    df_long['price'] = df_long['price'].astype(float) / 1000 # Convert price per MWh to price per KWh
    return df_long

train = elongate(pd.read_excel('data/train.xlsx'))
val = elongate(pd.read_excel('data/validate.xlsx'))

In [19]:
### FOURIER TRANSFORM ###

def fourier_top_freq(data, segment_size=72):
    '''
    Applies Fourier transform to segments of the 'price' data and extracts the top 3 frequencies.
    
    Parameters:
    data (DataFrame): Input data.
    segment_size (int): Number of data points in each segment for Fourier transform.
    
    Returns:
    DataFrame: The input data with top 3 Fourier frequencies for each segment.
    '''

    # Create new columns for the top 3 fourier frequencies
    for i in range(1, 4):
        data[f'fourier_freq_{i}'] = np.nan

    # For each range of data points, calculate the Fourier transform
    for i in range(segment_size, len(data), 1): # Start at <segment_size>
        # Fourier transform of the last <segment_size> data points
        segment = data['price'][i - segment_size:i]
        fourier_coeffs = np.fft.fft(segment)
        freqs = np.fft.fftfreq(segment_size, d=1)  # Assuming hourly data, hence d=1

        # Get indices of top 3 frequencies based on magnitude of Fourier coefficients
        indices = np.argsort(np.abs(fourier_coeffs))[::-1][1:4] # ::-1 to sort in descending order

        for j in range(3):
            column_name = f'fourier_freq_{j + 1}'
            data.loc[i, column_name] = freqs[indices[j]]

    return data

train = fourier_top_freq(train, segment_size=72)
val = fourier_top_freq(val)


            date  hour    price            datetime  fourier_freq_1  \
16439 2009-12-31    15  0.03300 2009-12-31 15:00:00        0.041667   
17535 2009-12-31    16  0.02982 2009-12-31 16:00:00       -0.041667   
18631 2009-12-31    17  0.03028 2009-12-31 17:00:00       -0.041667   
19727 2009-12-31    18  0.04000 2009-12-31 18:00:00       -0.041667   
20823 2009-12-31    19  0.04048 2009-12-31 19:00:00       -0.041667   
21919 2009-12-31    20  0.03600 2009-12-31 20:00:00        0.041667   
23015 2009-12-31    21  0.02900 2009-12-31 21:00:00       -0.041667   
24111 2009-12-31    22  0.02482 2009-12-31 22:00:00        0.041667   
25207 2009-12-31    23  0.03128 2009-12-31 23:00:00       -0.041667   
26303 2009-12-31    24  0.03100 2010-01-01 00:00:00       -0.041667   

       fourier_freq_2  fourier_freq_3  
16439       -0.041667        0.013889  
17535        0.041667        0.083333  
18631        0.041667        0.083333  
19727        0.041667        0.083333  
20823        0.041

In [None]:
### GRADIENT FEATURES ###

In [None]:
### HISTORIS FEATURES ###

In [None]:
### 