In [1]:
# paquetes para importar
import requests
import json
import numpy as np
import datetime
import string
import pandas as pd

### 0) Dataset original

In [14]:
df_version_0 = pd.read_csv('my_data.csv')

In [15]:
df_version_0.head()

Unnamed: 0,date,hour,value
0,2014/04/01,0,92.75
1,2014/04/01,1,86.46
2,2014/04/01,2,78.82
3,2014/04/01,3,77.47
4,2014/04/01,4,76.9


### 1) Versión 1: añadimos día de la semana y día laboral

### a) Día de la semana:

Vamos a añadir una nueva columna `weekday` en el que va a haber un valor según el día: Lunes=0, ..., Domingo=6.

In [117]:
df_version_1 = pd.read_csv('my_data.csv')

In [118]:
df_version_1['weekday'] = pd.to_datetime(df_version_1['date']).apply(lambda x: x.weekday())

In [119]:
df_version_1.head()

Unnamed: 0,date,hour,value,weekday
0,2014/04/01,0,92.75,1
1,2014/04/01,1,86.46,1
2,2014/04/01,2,78.82,1
3,2014/04/01,3,77.47,1
4,2014/04/01,4,76.9,1


In [120]:
df_version_1.to_csv(r'my_data_version_1.csv', index=False)

### b) Día laboral o festivo

Vamos a añadir una nueva columna `working_day`: si es sábado/domingo o festivo nacional será 0, si es un día laboral normal será 1.

In [121]:
df_version_1 = pd.read_csv('my_data_version_1.csv')

In [122]:
df_version_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70872 entries, 0 to 70871
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   date     70872 non-null  object 
 1   hour     70872 non-null  int64  
 2   value    70872 non-null  float64
 3   weekday  70872 non-null  int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 2.2+ MB


In [123]:
df_version_1.weekday.unique()

array([1, 2, 3, 4, 5, 6, 0])

Creamos una función para determinar si es sábado/domingo (0) o lunes-viernes (1):

In [124]:
# if weekday is 5 or 6 (Saturday or Sunday) is not working day, else working day:
def is_working_day(day):
    if day == 5 or day == 6:
        return 0
    else:
        return 1

In [125]:
is_working_day(5)

0

In [126]:
is_working_day(df_version_1['weekday'][10])

1

In [127]:
df_version_1['working_day'] = df_version_1['weekday'].apply(is_working_day)

In [128]:
df_version_1.working_day.unique()

array([1, 0])

Ahora vamos a ver si nuestra fecha `date` es festivo nacional (0) o no (1):

Comprobamos que un festivo nacional que cayó en día laboral aparece como laboral (`working_day` = 1):

In [129]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,1
3265,2014/08/15,1,110.66,4,1
3266,2014/08/15,2,114.31,4,1
3267,2014/08/15,3,115.16,4,1
3268,2014/08/15,4,120.58,4,1
3269,2014/08/15,5,119.8,4,1
3270,2014/08/15,6,121.51,4,1
3271,2014/08/15,7,116.75,4,1
3272,2014/08/15,8,117.14,4,1
3273,2014/08/15,9,117.78,4,1


Buscamos los festivos de cada año entre 2014 y 2022 y los concatenamos después:

In [130]:
festives_spain_2014 = [
    '2014/01/01', '2014/01/06', '2014/04/17', '2014/04/18', '2014/05/01', 
    '2014/06/19', '2014/08/15', '2014/11/01', '2014/12/06', '2014/12/08', '2014/12/25'
]

In [131]:
festives_spain_2015 = [
    '2015/01/01', '2015/01/06', '2015/04/02', '2015/04/03',
    '2015/05/01', '2015/10/12', '2015/11/01', 
    '2015/12/06', '2015/12/08', '2015/12/25'
]

In [132]:
festives_spain_2016 = [
    '2016/01/01', '2016/01/06', '2016/03/24', '2016/03/25',
    '2016/05/01', '2016/08/15', '2016/10/12', 
    '2016/11/01', '2016/12/06', '2016/12/08',
    '2016/12/25'
]

In [133]:
festives_spain_2017 = [
    '2017/01/01', '2017/01/06', '2017/04/13', '2017/04/14', '2017/05/01',
    '2017/08/15', '2017/10/12', '2017/11/01', '2017/12/06',
    '2017/12/08', '2017/12/25'
]

In [134]:
festives_spain_2018 = [
    '2018/01/01', '2018/01/06', '2018/03/29', '2018/03/30', '2018/05/01',
    '2018/08/15', '2018/10/12', '2018/11/01', '2018/12/06',
    '2018/12/08', '2018/12/25'
]

In [135]:
festives_spain_2019 = [
    '2019/01/01', '2019/01/06', '2019/04/18', '2019/04/19',
    '2019/05/01', '2019/08/15', '2019/10/12', '2019/11/01',
    '2019/12/06', '2019/12/08', '2019/12/25'
]

In [136]:
festives_spain_2020 = [
    '2020/01/01', '2020/01/06', '2020/04/09', '2020/04/10',
    '2020/05/01', '2020/08/15', '2020/10/12', '2020/11/01',
    '2020/12/06', '2020/12/08', '2020/12/25'
]

In [137]:
festives_spain_2021 = [
    '2021/01/01', '2021/01/06', '2021/04/01', '2021/04/02',
    '2021/05/01', '2021/08/15', '2021/10/12', '2021/11/01',
    '2021/12/06', '2021/12/08', '2021/12/25'
]

In [138]:
festives_spain_2022 = [
    '2022/01/01', '2022/01/06', '2022/04/14', '2022/04/15',
    '2022/05/01', '2022/08/15', '2022/10/12', '2022/11/01',
    '2022/12/06', '2022/12/08', '2022/12/25'
]

In [139]:
festives_spain_2014_2021 = festives_spain_2014 + festives_spain_2015 + festives_spain_2016 + festives_spain_2017 + festives_spain_2018 + festives_spain_2019 + festives_spain_2020 + festives_spain_2021 + festives_spain_2022

In [140]:
festives_spain_2014_2021

['2014/01/01',
 '2014/01/06',
 '2014/04/17',
 '2014/04/18',
 '2014/05/01',
 '2014/06/19',
 '2014/08/15',
 '2014/11/01',
 '2014/12/06',
 '2014/12/08',
 '2014/12/25',
 '2015/01/01',
 '2015/01/06',
 '2015/04/02',
 '2015/04/03',
 '2015/05/01',
 '2015/10/12',
 '2015/11/01',
 '2015/12/06',
 '2015/12/08',
 '2015/12/25',
 '2016/01/01',
 '2016/01/06',
 '2016/03/24',
 '2016/03/25',
 '2016/05/01',
 '2016/08/15',
 '2016/10/12',
 '2016/11/01',
 '2016/12/06',
 '2016/12/08',
 '2016/12/25',
 '2017/01/01',
 '2017/01/06',
 '2017/04/13',
 '2017/04/14',
 '2017/05/01',
 '2017/08/15',
 '2017/10/12',
 '2017/11/01',
 '2017/12/06',
 '2017/12/08',
 '2017/12/25',
 '2018/01/01',
 '2018/01/06',
 '2018/03/29',
 '2018/03/30',
 '2018/05/01',
 '2018/08/15',
 '2018/10/12',
 '2018/11/01',
 '2018/12/06',
 '2018/12/08',
 '2018/12/25',
 '2019/01/01',
 '2019/01/06',
 '2019/04/18',
 '2019/04/19',
 '2019/05/01',
 '2019/08/15',
 '2019/10/12',
 '2019/11/01',
 '2019/12/06',
 '2019/12/08',
 '2019/12/25',
 '2020/01/01',
 '2020/01/

In [141]:
def is_national_festive(date):
    if date in festives_spain_2014_2021:
        return 0
    else:
        return 1

In [142]:
df_version_1['working_day'] = df_version_1['date'].apply(is_national_festive)

Comprobamos que el día festivo que usamos como referencia, ahora figura como día festivo:

In [143]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,0
3265,2014/08/15,1,110.66,4,0
3266,2014/08/15,2,114.31,4,0
3267,2014/08/15,3,115.16,4,0
3268,2014/08/15,4,120.58,4,0
3269,2014/08/15,5,119.8,4,0
3270,2014/08/15,6,121.51,4,0
3271,2014/08/15,7,116.75,4,0
3272,2014/08/15,8,117.14,4,0
3273,2014/08/15,9,117.78,4,0


Guardamos en un nuevo fichero:

In [144]:
df_version_1.to_csv(r'my_data_version_1.csv', index=False)

Comprobamos que una vez guardado, el fichero tiene los últimos cambios:

In [145]:
df_version_1 = pd.read_csv('my_data_version_1.csv')

In [146]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,0
3265,2014/08/15,1,110.66,4,0
3266,2014/08/15,2,114.31,4,0
3267,2014/08/15,3,115.16,4,0
3268,2014/08/15,4,120.58,4,0
3269,2014/08/15,5,119.8,4,0
3270,2014/08/15,6,121.51,4,0
3271,2014/08/15,7,116.75,4,0
3272,2014/08/15,8,117.14,4,0
3273,2014/08/15,9,117.78,4,0


### Versión 2: Añadimos la temperatura

In [11]:
# en terminal: grib_get_data madrid_2014_2022.grib > madrid_2014_2022.csv para obtener el csv
madrid_2014_2022 = pd.read_csv('madrid_2014_2022.csv')
madrid_2014_2022

Unnamed: 0,Latitude,Longitude,Value
0,40.420 -3.780 2.7763142395e+02,,
1,40.420 -3.680 2.7791194153e+02,,
2,40.420 -3.580 2.7808610535e+02,,
3,40.320 -3.780 2.7803109741e+02,,
4,40.320 -3.680 2.7823220825e+02,,
...,...,...,...
500809,40.420 -3.680 2.8030026245e+02,,
500810,40.420 -3.580 2.8084033203e+02,,
500811,40.320 -3.780 2.7984280396e+02,,
500812,40.320 -3.680 2.8060583496e+02,,


In [2]:
# por qué faltan las otras columnas?¿ 
# necesitamos fecha

In [2]:
import xarray as xr
import matplotlib.pyplot as plt



In [None]:
# ds = xr.tutorial.load_dataset("madrid_2014_2022.grib", engine="cfgrib")

In [16]:
# import xarray

# grib_data = xarray.open_dataset(
#     'madrid_2014_2022.grib', 
#     engine='cfgrib', 
#     backend_kwargs={'filter_by_keys':{'typeOfLevel': 'heightAboveGround','level': 2}}
# )

In [None]:
grib_data

In [None]:
# grib_data.to_dataframe()

In [5]:
ds = xr.open_dataset('madrid_2014_2022.grib', engine='cfgrib')

In [8]:
ds_dataframe = ds.to_dataframe()

In [9]:
ds_dataframe

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,number,surface,valid_time,t2m
time,step,latitude,longitude,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-12-31,0 days 01:00:00,40.42,-3.78,0,0.0,2013-12-31 01:00:00,
2013-12-31,0 days 01:00:00,40.42,-3.68,0,0.0,2013-12-31 01:00:00,
2013-12-31,0 days 01:00:00,40.42,-3.58,0,0.0,2013-12-31 01:00:00,
2013-12-31,0 days 01:00:00,40.32,-3.78,0,0.0,2013-12-31 01:00:00,
2013-12-31,0 days 01:00:00,40.32,-3.68,0,0.0,2013-12-31 01:00:00,
...,...,...,...,...,...,...,...
2022-02-28,1 days 00:00:00,40.42,-3.68,0,0.0,2022-03-01 00:00:00,280.300262
2022-02-28,1 days 00:00:00,40.42,-3.58,0,0.0,2022-03-01 00:00:00,280.840332
2022-02-28,1 days 00:00:00,40.32,-3.78,0,0.0,2022-03-01 00:00:00,279.842804
2022-02-28,1 days 00:00:00,40.32,-3.68,0,0.0,2022-03-01 00:00:00,280.605835


In [19]:
ds_dataframe.to_csv(r'madrid_2014_2022.csv', index=True)

In [22]:
pd.read_csv('madrid_2014_2022.csv')

Unnamed: 0,time,step,latitude,longitude,number,surface,valid_time,t2m
0,2013-12-31,0 days 01:00:00,40.42,-3.78,0,0.0,2013-12-31 01:00:00,
1,2013-12-31,0 days 01:00:00,40.42,-3.68,0,0.0,2013-12-31 01:00:00,
2,2013-12-31,0 days 01:00:00,40.42,-3.58,0,0.0,2013-12-31 01:00:00,
3,2013-12-31,0 days 01:00:00,40.32,-3.78,0,0.0,2013-12-31 01:00:00,
4,2013-12-31,0 days 01:00:00,40.32,-3.68,0,0.0,2013-12-31 01:00:00,
...,...,...,...,...,...,...,...,...
429403,2022-02-28,1 days 00:00:00,40.42,-3.68,0,0.0,2022-03-01 00:00:00,280.30026
429404,2022-02-28,1 days 00:00:00,40.42,-3.58,0,0.0,2022-03-01 00:00:00,280.84033
429405,2022-02-28,1 days 00:00:00,40.32,-3.78,0,0.0,2022-03-01 00:00:00,279.84280
429406,2022-02-28,1 days 00:00:00,40.32,-3.68,0,0.0,2022-03-01 00:00:00,280.60583
