In [None]:
import pandas as pd
import time
import numpy as np
import datetime

In [None]:
import numpy as np
import pandas as pd

def positional_encoding(df, time_col='datetime_seconds', period=1.0, embedding_dim=64):
    """
    Apply positional encoding to a time series stored in a pandas DataFrame.

    Parameters:
    - df: pandas DataFrame containing time series data
    - time_col: name of the column containing datetime information in seconds
    - period: the duration of one period in the time series (e.g., 1 month)
    - embedding_dim: the dimensionality of the positional encoding

    Returns:
    - pandas DataFrame with positional encoding added
    """
    time_steps = df[time_col].values
    angle_rads = 2 * np.pi * time_steps / period
    position = np.arange(embedding_dim)[:, np.newaxis]
    pos_enc_matrix = np.sin(angle_rads * position / embedding_dim) + np.cos(angle_rads * position / embedding_dim)

    # Add positional encoding columns to the original DataFrame
    pos_enc_columns = [f'pos_enc_{i}' for i in range(embedding_dim)]
    df[pos_enc_columns] = pos_enc_matrix

    return df

# Example usage:
# Assuming df is your DataFrame with a 'datetime_seconds' column
# and you want to add positional encoding for a 1-month period with an embedding dimension of 64
df = pd.DataFrame({'datetime_seconds': np.arange(0, 30 * 24 * 60 * 60, 60 * 60)})
df_with_pos_enc = positional_encoding(df, period=30 * 24 * 60 * 60, embedding_dim=64)

# Display the DataFrame with positional encoding
print(df_with_pos_enc.head())

In [None]:
train_dataset = process_data('Data/train_raw.csv')
test_dataset = process_data('Data/test_raw.csv')

train_dataset.to_csv(r'Data/train_dataset.csv', index=False)
test_dataset.to_csv(r'Data/test_dataset.csv', index=False)