## 1. Imports and definitions

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-darkgrid')

In [9]:
# Cliclical encoding of direction features
# ==============================================================================
def cyclical_encoding(data: pd.Series, cycle_length: int) -> pd.DataFrame:
    """
    Encode a cyclical feature with two new features sine and cosine.
    The minimum value of the feature is assumed to be 0. The maximum value
    of the feature is passed as an argument.
      
    Parameters
    ----------
    data : pd.Series
        Series with the feature to encode.
    cycle_length : int
        The length of the cycle. For example, 12 for months, 24 for hours, etc.
        This value is used to calculate the angle of the sin and cos.

    Returns
    -------
    result : pd.DataFrame
        Dataframe with the two new features sin and cos.

    """

    sin = np.sin(2 * np.pi * data/cycle_length)
    cos = np.cos(2 * np.pi * data/cycle_length)
    result =  pd.DataFrame({
                  f"{data.name}_sin": sin,
                  f"{data.name}_cos": cos
              })

    return result

## 2. Read and transform data 

In [3]:
# Read data in
data_dir = 'Data/spain/four years'
file = '/spain_data_all.csv'

df = pd.read_csv(data_dir + file)
df['datetime'] = pd.to_datetime(df['datetime'])
df.set_index(keys = 'datetime', inplace=True)
df = df.asfreq('H')

df.head()

Unnamed: 0_level_0,wave_height,wave_period,wave_direction,current_speed,current_direction,water_temp,wind_speed,wind_direction
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-06-18 00:00:00,0.78,4.92,333.0,0.212,105.55,17.592,1.82,232.0
2020-06-18 01:00:00,0.76,5.06,334.0,0.206,100.62,17.56,1.4,230.0
2020-06-18 02:00:00,0.74,5.16,334.0,0.203,95.08,17.536,1.37,251.0
2020-06-18 03:00:00,0.73,5.22,335.0,0.206,90.01,17.52,1.46,227.0
2020-06-18 04:00:00,0.73,5.28,336.0,0.21,87.01,17.512,1.88,228.0


In [6]:
# Slice off target variables
target_vars = ['wave_height', 'wave_period', 'wave_direction']
df_target = df[target_vars].copy()
df_target

Unnamed: 0_level_0,wave_height,wave_period,wave_direction
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-06-18 00:00:00,0.78,4.92,333.0
2020-06-18 01:00:00,0.76,5.06,334.0
2020-06-18 02:00:00,0.74,5.16,334.0
2020-06-18 03:00:00,0.73,5.22,335.0
2020-06-18 04:00:00,0.73,5.28,336.0
...,...,...,...
2024-06-18 19:00:00,0.93,3.61,21.0
2024-06-18 20:00:00,0.92,3.82,21.0
2024-06-18 21:00:00,0.86,4.08,19.0
2024-06-18 22:00:00,0.84,4.20,19.0


In [14]:
# Transform wave direction variable into a cyclical variable, as the original variable is in degrees, from 0 to 360
direction_cycle = cyclical_encoding(df['wave_direction'], cycle_length=360)

# Replace direction variable with one of the produced variables. Could be either sine or cosine, we are indifferent
df_target['wave_direction_sine'] = direction_cycle['wave_direction_sin']
df_target.drop(columns = ['wave_direction'], inplace = True)
df_target


Unnamed: 0_level_0,wave_height,wave_period,wave_direction_sine
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-06-18 00:00:00,0.78,4.92,-0.453990
2020-06-18 01:00:00,0.76,5.06,-0.438371
2020-06-18 02:00:00,0.74,5.16,-0.438371
2020-06-18 03:00:00,0.73,5.22,-0.422618
2020-06-18 04:00:00,0.73,5.28,-0.406737
...,...,...,...
2024-06-18 19:00:00,0.93,3.61,0.358368
2024-06-18 20:00:00,0.92,3.82,0.358368
2024-06-18 21:00:00,0.86,4.08,0.325568
2024-06-18 22:00:00,0.84,4.20,0.325568
