In [None]:
# Standard libraries
import pandas as pd
import numpy as np
from datetime import datetime

# Plot
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
# Load weather data
data_weather = pd.read_csv('../raw_data/open-meteo-paris.csv')
df_weather = data_weather

# Generates datetime field from string
df_weather['timestamp'] = df_weather['time'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M'))


# Creating day of week as a cyclical feature
# First, create the day of the week as a numerical feature
df_weather['day_of_week'] = pd.to_datetime(df_weather['timestamp']).dt.weekday
# Since we have a 7 days week period (e.g., days in a week)
period = 7
# Convert 'day_of_week' to radians
df_weather['day_of_week_radians'] = 2 * np.pi * df_weather['day_of_week'] / period
# Create new features using sine and cosine
df_weather['day_of_week_sin'] = np.sin(df_weather['day_of_week_radians'])
df_weather['day_of_week_cos'] = np.cos(df_weather['day_of_week_radians'])
# Dropping ['month_radians']
df_weather.drop(columns=['day_of_week_radians', 'day_of_week'], inplace=True)


# Drops old DATE column
df_weather = df_weather.drop(columns=['time'])


# Creates cyclical month feature according to the date
df_weather['month'] = df_weather.timestamp.dt.month
# Assuming we have a 12 month period (e.g., month in a year)
period = 12
# Convert 'month' to radians
df_weather['month_radians'] = 2 * np.pi * df_weather['month'] / period
# Create new features using sine and cosine
df_weather['month_sin'] = np.sin(df_weather['month_radians'])
df_weather['month_cos'] = np.cos(df_weather['month_radians'])
# Dropping ['month_radians']
df_weather.drop(columns=['month_radians', 'month'], inplace=True)
# Setting timestamp column as index
df_weather.set_index(['timestamp'], inplace=True)


# Creates dictionary with Holidays
holidays = [
    '2021-1-1',
    '2021-4-5',
    '2021-5-1',
    '2021-5-8',
    '2021-5-13',
    '2021-5-24',
    '2021-7-14',
    '2021-8-15',
    '2021-11-1',
    '2021-11-11',
    '2021-12-25',
    '2022-1-1',
    '2022-4-18',
    '2022-5-1',
    '2022-5-8',
    '2022-5-26',
    '2022-6-6',
    '2022-7-14',
    '2022-8-15',
]
holidays = [pd.to_datetime(holiday)for holiday in holidays ]
# Add isHoliday column to the df_weather
df_weather['isHoliday'] = df_weather.index.map(lambda x: 1 if x in holidays else 0)
