In [13]:
# paquetes para importar
import requests
import json
import numpy as np
import datetime
import string
import pandas as pd

### 0) Dataset original

In [14]:
df_version_0 = pd.read_csv('my_data.csv')

In [15]:
df_version_0.head()

Unnamed: 0,date,hour,value
0,2014/04/01,0,92.75
1,2014/04/01,1,86.46
2,2014/04/01,2,78.82
3,2014/04/01,3,77.47
4,2014/04/01,4,76.9


### 1) Versión 1: añadimos día de la semana y día laboral

### a) Día de la semana:

Vamos a añadir una nueva columna `weekday` en el que va a haber un valor según el día: Lunes=0, ..., Domingo=6.

In [17]:
df_version_1 = pd.read_csv('my_data.csv')

In [18]:
df_version_1['weekday'] = pd.to_datetime(df_version_1['date']).apply(lambda x: x.weekday())

In [19]:
df_version_1.head()

Unnamed: 0,date,hour,value,weekday
0,2014/04/01,0,92.75,1
1,2014/04/01,1,86.46,1
2,2014/04/01,2,78.82,1
3,2014/04/01,3,77.47,1
4,2014/04/01,4,76.9,1


In [20]:
df_version_1.to_csv(r'my_data_version_1.csv', index=False)

### b) Día laboral o festivo

Vamos a añadir una nueva columna `working_day`: si es sábado/domingo o festivo nacional será 0, si es un día laboral normal será 1.

In [90]:
df_version_1 = pd.read_csv('my_data_version_1.csv')

In [91]:
df_version_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70872 entries, 0 to 70871
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   date     70872 non-null  object 
 1   hour     70872 non-null  int64  
 2   value    70872 non-null  float64
 3   weekday  70872 non-null  int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 2.2+ MB


In [92]:
df_version_1.weekday.unique()

array([1, 2, 3, 4, 5, 6, 0])

Creamos una función para determinar si es sábado/domingo (0) o lunes-viernes (1):

In [93]:
# if weekday is 5 or 6 (Saturday or Sunday) is not working day, else working day:
def is_working_day(day):
    if day == 5 or day == 6:
        return 0
    else:
        return 1

In [94]:
is_working_day(5)

0

In [95]:
is_working_day(df_version_1['weekday'][10])

1

In [96]:
df_version_1['working_day'] = df_version_1['weekday'].apply(is_working_day)

In [97]:
df_version_1.working_day.unique()

array([1, 0])

Ahora vamos a ver si nuestra fecha `date` es festivo nacional (0) o no (1):

Comprobamos que un festivo nacional que cayó en día laboral aparece como laboral (`working_day` = 1):

In [99]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,1
3265,2014/08/15,1,110.66,4,1
3266,2014/08/15,2,114.31,4,1
3267,2014/08/15,3,115.16,4,1
3268,2014/08/15,4,120.58,4,1
3269,2014/08/15,5,119.8,4,1
3270,2014/08/15,6,121.51,4,1
3271,2014/08/15,7,116.75,4,1
3272,2014/08/15,8,117.14,4,1
3273,2014/08/15,9,117.78,4,1


Buscamos los festivos de cada año entre 2014 y 2022 y los concatenamos después:

In [102]:
festives_spain_2014 = [
    '2014/01/01', '2014/01/06', '2014/04/17', '2014/04/18', '2014/05/01', 
    '2014/06/19', '2014/08/15', '2014/11/01', '2014/12/06', '2014/12/08', '2014/12/25'
]

In [103]:
festives_spain_2015 = [
    '2015/01/01', '2015/01/06', '2015/04/02', '2015/04/03',
    '2015/05/01', '2015/10/12', '2015/11/01', 
    '2015/12/06', '2015/12/08', '2015/12/25'
]

In [104]:
festives_spain_2016 = [
    '2016/01/01', '2016/01/06', '2016/03/24', '2016/03/25',
    '2016/05/01', '2016/08/15', '2016/10/12', 
    '2016/11/01', '2016/12/06', '2016/12/08',
    '2016/12/25'
]

In [105]:
festives_spain_2017 = [
    '2017/01/01', '2017/01/06', '2017/04/13', '2017/04/14', '2017/05/01',
    '2017/08/15', '2017/10/12', '2017/11/01', '2017/12/06',
    '2017/12/08', '2017/12/25'
]

In [106]:
festives_spain_2018 = [
    '2018/01/01', '2018/01/06', '2018/03/29', '2018/03/30', '2018/05/01',
    '2018/08/15', '2018/10/12', '2018/11/01', '2018/12/06',
    '2018/12/08', '2018/12/25'
]

In [107]:
festives_spain_2019 = [
    '2019/01/01', '2019/01/06', '2019/04/18', '2019/04/19',
    '2019/05/01', '2019/08/15', '2019/10/12', '2019/11/01',
    '2019/12/06', '2019/12/08', '2019/12/25'
]

In [108]:
festives_spain_2020 = [
    '2020/01/01', '2020/01/06', '2020/04/09', '2020/04/10',
    '2020/05/01', '2020/08/15', '2020/10/12', '2020/11/01',
    '2020/12/06', '2020/12/08', '2020/12/25'
]

In [109]:
festives_spain_2021 = [
    '2021/01/01', '2021/01/06', '2021/04/01', '2021/04/02',
    '2021/05/01', '2021/08/15', '2021/10/12', '2021/11/01',
    '2021/12/06', '2021/12/08', '2021/12/25'
]

In [110]:
festives_spain_2022 = [
    '2022/01/01', '2022/01/06', '2022/04/14', '2022/04/15',
    '2022/05/01', '2022/08/15', '2022/10/12', '2022/11/01',
    '2022/12/06', '2022/12/08', '2022/12/25'
]

In [111]:
festives_spain_2014_2021 = festives_spain_2014 + festives_spain_2015 + festives_spain_2016 + festives_spain_2017 + festives_spain_2018 + festives_spain_2019 + festives_spain_2020 + festives_spain_2021 + festives_spain_2022

In [112]:
festives_spain_2014_2021

['2014/01/01',
 '2014/01/06',
 '2014/04/17',
 '2014/04/18',
 '2014/05/01',
 '2014/06/19',
 '2014/08/15',
 '2014/11/01',
 '2014/12/06',
 '2014/12/08',
 '2014/12/25',
 '2015/01/01',
 '2015/01/06',
 '2015/04/02',
 '2015/04/03',
 '2015/05/01',
 '2015/10/12',
 '2015/11/01',
 '2015/12/06',
 '2015/12/08',
 '2015/12/25',
 '2016/01/01',
 '2016/01/06',
 '2016/03/24',
 '2016/03/25',
 '2016/05/01',
 '2016/08/15',
 '2016/10/12',
 '2016/11/01',
 '2016/12/06',
 '2016/12/08',
 '2016/12/25',
 '2017/01/01',
 '2017/01/06',
 '2017/04/13',
 '2017/04/14',
 '2017/05/01',
 '2017/08/15',
 '2017/10/12',
 '2017/11/01',
 '2017/12/06',
 '2017/12/08',
 '2017/12/25',
 '2018/01/01',
 '2018/01/06',
 '2018/03/29',
 '2018/03/30',
 '2018/05/01',
 '2018/08/15',
 '2018/10/12',
 '2018/11/01',
 '2018/12/06',
 '2018/12/08',
 '2018/12/25',
 '2019/01/01',
 '2019/01/06',
 '2019/04/18',
 '2019/04/19',
 '2019/05/01',
 '2019/08/15',
 '2019/10/12',
 '2019/11/01',
 '2019/12/06',
 '2019/12/08',
 '2019/12/25',
 '2020/01/01',
 '2020/01/

In [113]:
def is_national_festive(date):
    if date in festives_spain_2014_2021:
        return 0
    else:
        return 1

In [114]:
df_version_1['working_day'] = df_version_1['date'].apply(is_national_festive)

In [115]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,0
3265,2014/08/15,1,110.66,4,0
3266,2014/08/15,2,114.31,4,0
3267,2014/08/15,3,115.16,4,0
3268,2014/08/15,4,120.58,4,0
3269,2014/08/15,5,119.8,4,0
3270,2014/08/15,6,121.51,4,0
3271,2014/08/15,7,116.75,4,0
3272,2014/08/15,8,117.14,4,0
3273,2014/08/15,9,117.78,4,0
