In [1]:
# paquetes para importar
import requests
import json
import numpy as np
import datetime
import string
import pandas as pd

### 0) Dataset original

In [14]:
df_version_0 = pd.read_csv('my_data.csv')

In [15]:
df_version_0.head()

Unnamed: 0,date,hour,value
0,2014/04/01,0,92.75
1,2014/04/01,1,86.46
2,2014/04/01,2,78.82
3,2014/04/01,3,77.47
4,2014/04/01,4,76.9


### 1) Versión 1: añadimos día de la semana y día laboral

### a) Día de la semana:

Vamos a añadir una nueva columna `weekday` en el que va a haber un valor según el día: Lunes=0, ..., Domingo=6.

In [117]:
df_version_1 = pd.read_csv('my_data.csv')

In [118]:
df_version_1['weekday'] = pd.to_datetime(df_version_1['date']).apply(lambda x: x.weekday())

In [119]:
df_version_1.head()

Unnamed: 0,date,hour,value,weekday
0,2014/04/01,0,92.75,1
1,2014/04/01,1,86.46,1
2,2014/04/01,2,78.82,1
3,2014/04/01,3,77.47,1
4,2014/04/01,4,76.9,1


In [120]:
df_version_1.to_csv(r'my_data_version_1.csv', index=False)

### b) Día laboral o festivo

Vamos a añadir una nueva columna `working_day`: si es sábado/domingo o festivo nacional será 0, si es un día laboral normal será 1.

In [121]:
df_version_1 = pd.read_csv('my_data_version_1.csv')

In [122]:
df_version_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70872 entries, 0 to 70871
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   date     70872 non-null  object 
 1   hour     70872 non-null  int64  
 2   value    70872 non-null  float64
 3   weekday  70872 non-null  int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 2.2+ MB


In [123]:
df_version_1.weekday.unique()

array([1, 2, 3, 4, 5, 6, 0])

Creamos una función para determinar si es sábado/domingo (0) o lunes-viernes (1):

In [124]:
# if weekday is 5 or 6 (Saturday or Sunday) is not working day, else working day:
def is_working_day(day):
    if day == 5 or day == 6:
        return 0
    else:
        return 1

In [125]:
is_working_day(5)

0

In [126]:
is_working_day(df_version_1['weekday'][10])

1

In [127]:
df_version_1['working_day'] = df_version_1['weekday'].apply(is_working_day)

In [128]:
df_version_1.working_day.unique()

array([1, 0])

Ahora vamos a ver si nuestra fecha `date` es festivo nacional (0) o no (1):

Comprobamos que un festivo nacional que cayó en día laboral aparece como laboral (`working_day` = 1):

In [129]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,1
3265,2014/08/15,1,110.66,4,1
3266,2014/08/15,2,114.31,4,1
3267,2014/08/15,3,115.16,4,1
3268,2014/08/15,4,120.58,4,1
3269,2014/08/15,5,119.8,4,1
3270,2014/08/15,6,121.51,4,1
3271,2014/08/15,7,116.75,4,1
3272,2014/08/15,8,117.14,4,1
3273,2014/08/15,9,117.78,4,1


Buscamos los festivos de cada año entre 2014 y 2022 y los concatenamos después:

In [130]:
festives_spain_2014 = [
    '2014/01/01', '2014/01/06', '2014/04/17', '2014/04/18', '2014/05/01', 
    '2014/06/19', '2014/08/15', '2014/11/01', '2014/12/06', '2014/12/08', '2014/12/25'
]

In [131]:
festives_spain_2015 = [
    '2015/01/01', '2015/01/06', '2015/04/02', '2015/04/03',
    '2015/05/01', '2015/10/12', '2015/11/01', 
    '2015/12/06', '2015/12/08', '2015/12/25'
]

In [132]:
festives_spain_2016 = [
    '2016/01/01', '2016/01/06', '2016/03/24', '2016/03/25',
    '2016/05/01', '2016/08/15', '2016/10/12', 
    '2016/11/01', '2016/12/06', '2016/12/08',
    '2016/12/25'
]

In [133]:
festives_spain_2017 = [
    '2017/01/01', '2017/01/06', '2017/04/13', '2017/04/14', '2017/05/01',
    '2017/08/15', '2017/10/12', '2017/11/01', '2017/12/06',
    '2017/12/08', '2017/12/25'
]

In [134]:
festives_spain_2018 = [
    '2018/01/01', '2018/01/06', '2018/03/29', '2018/03/30', '2018/05/01',
    '2018/08/15', '2018/10/12', '2018/11/01', '2018/12/06',
    '2018/12/08', '2018/12/25'
]

In [135]:
festives_spain_2019 = [
    '2019/01/01', '2019/01/06', '2019/04/18', '2019/04/19',
    '2019/05/01', '2019/08/15', '2019/10/12', '2019/11/01',
    '2019/12/06', '2019/12/08', '2019/12/25'
]

In [136]:
festives_spain_2020 = [
    '2020/01/01', '2020/01/06', '2020/04/09', '2020/04/10',
    '2020/05/01', '2020/08/15', '2020/10/12', '2020/11/01',
    '2020/12/06', '2020/12/08', '2020/12/25'
]

In [137]:
festives_spain_2021 = [
    '2021/01/01', '2021/01/06', '2021/04/01', '2021/04/02',
    '2021/05/01', '2021/08/15', '2021/10/12', '2021/11/01',
    '2021/12/06', '2021/12/08', '2021/12/25'
]

In [138]:
festives_spain_2022 = [
    '2022/01/01', '2022/01/06', '2022/04/14', '2022/04/15',
    '2022/05/01', '2022/08/15', '2022/10/12', '2022/11/01',
    '2022/12/06', '2022/12/08', '2022/12/25'
]

In [139]:
festives_spain_2014_2021 = festives_spain_2014 + festives_spain_2015 + festives_spain_2016 + festives_spain_2017 + festives_spain_2018 + festives_spain_2019 + festives_spain_2020 + festives_spain_2021 + festives_spain_2022

In [140]:
festives_spain_2014_2021

['2014/01/01',
 '2014/01/06',
 '2014/04/17',
 '2014/04/18',
 '2014/05/01',
 '2014/06/19',
 '2014/08/15',
 '2014/11/01',
 '2014/12/06',
 '2014/12/08',
 '2014/12/25',
 '2015/01/01',
 '2015/01/06',
 '2015/04/02',
 '2015/04/03',
 '2015/05/01',
 '2015/10/12',
 '2015/11/01',
 '2015/12/06',
 '2015/12/08',
 '2015/12/25',
 '2016/01/01',
 '2016/01/06',
 '2016/03/24',
 '2016/03/25',
 '2016/05/01',
 '2016/08/15',
 '2016/10/12',
 '2016/11/01',
 '2016/12/06',
 '2016/12/08',
 '2016/12/25',
 '2017/01/01',
 '2017/01/06',
 '2017/04/13',
 '2017/04/14',
 '2017/05/01',
 '2017/08/15',
 '2017/10/12',
 '2017/11/01',
 '2017/12/06',
 '2017/12/08',
 '2017/12/25',
 '2018/01/01',
 '2018/01/06',
 '2018/03/29',
 '2018/03/30',
 '2018/05/01',
 '2018/08/15',
 '2018/10/12',
 '2018/11/01',
 '2018/12/06',
 '2018/12/08',
 '2018/12/25',
 '2019/01/01',
 '2019/01/06',
 '2019/04/18',
 '2019/04/19',
 '2019/05/01',
 '2019/08/15',
 '2019/10/12',
 '2019/11/01',
 '2019/12/06',
 '2019/12/08',
 '2019/12/25',
 '2020/01/01',
 '2020/01/

In [141]:
def is_national_festive(date):
    if date in festives_spain_2014_2021:
        return 0
    else:
        return 1

In [142]:
df_version_1['working_day'] = df_version_1['date'].apply(is_national_festive)

Comprobamos que el día festivo que usamos como referencia, ahora figura como día festivo:

In [143]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,0
3265,2014/08/15,1,110.66,4,0
3266,2014/08/15,2,114.31,4,0
3267,2014/08/15,3,115.16,4,0
3268,2014/08/15,4,120.58,4,0
3269,2014/08/15,5,119.8,4,0
3270,2014/08/15,6,121.51,4,0
3271,2014/08/15,7,116.75,4,0
3272,2014/08/15,8,117.14,4,0
3273,2014/08/15,9,117.78,4,0


Guardamos en un nuevo fichero:

In [144]:
df_version_1.to_csv(r'my_data_version_1.csv', index=False)

Comprobamos que una vez guardado, el fichero tiene los últimos cambios:

In [145]:
df_version_1 = pd.read_csv('my_data_version_1.csv')

In [146]:
df_version_1.loc[df_version_1['date'] == '2014/08/15']

Unnamed: 0,date,hour,value,weekday,working_day
3264,2014/08/15,0,109.26,4,0
3265,2014/08/15,1,110.66,4,0
3266,2014/08/15,2,114.31,4,0
3267,2014/08/15,3,115.16,4,0
3268,2014/08/15,4,120.58,4,0
3269,2014/08/15,5,119.8,4,0
3270,2014/08/15,6,121.51,4,0
3271,2014/08/15,7,116.75,4,0
3272,2014/08/15,8,117.14,4,0
3273,2014/08/15,9,117.78,4,0


### Versión 2: Añadimos la temperatura

In [11]:
# en terminal: grib_get_data madrid_2014_2022.grib > madrid_2014_2022.csv para obtener el csv
madrid_2014_2022 = pd.read_csv('madrid_2014_2022.csv')
madrid_2014_2022

Unnamed: 0,Latitude,Longitude,Value
0,40.420 -3.780 2.7763142395e+02,,
1,40.420 -3.680 2.7791194153e+02,,
2,40.420 -3.580 2.7808610535e+02,,
3,40.320 -3.780 2.7803109741e+02,,
4,40.320 -3.680 2.7823220825e+02,,
...,...,...,...
500809,40.420 -3.680 2.8030026245e+02,,
500810,40.420 -3.580 2.8084033203e+02,,
500811,40.320 -3.780 2.7984280396e+02,,
500812,40.320 -3.680 2.8060583496e+02,,


In [2]:
# por qué faltan las otras columnas?¿ 
# necesitamos fecha

In [3]:
import xarray as xr
import matplotlib.pyplot as plt



In [None]:
# ds = xr.tutorial.load_dataset("madrid_2014_2022.grib", engine="cfgrib")

In [16]:
# import xarray

# grib_data = xarray.open_dataset(
#     'madrid_2014_2022.grib', 
#     engine='cfgrib', 
#     backend_kwargs={'filter_by_keys':{'typeOfLevel': 'heightAboveGround','level': 2}}
# )

In [None]:
grib_data

In [None]:
# grib_data.to_dataframe()

In [5]:
ds = xr.open_dataset('ficheros_temperatura/madrid_2014_2022_new.grib', engine='cfgrib')

In [6]:
ds_dataframe = ds.to_dataframe()

In [7]:
ds_dataframe

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,number,surface,valid_time,t2m
time,step,latitude,longitude,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-12-31,0 days 01:00:00,40.41,-3.71,0,0.0,2013-12-31 01:00:00,
2013-12-31,0 days 02:00:00,40.41,-3.71,0,0.0,2013-12-31 02:00:00,
2013-12-31,0 days 03:00:00,40.41,-3.71,0,0.0,2013-12-31 03:00:00,
2013-12-31,0 days 04:00:00,40.41,-3.71,0,0.0,2013-12-31 04:00:00,
2013-12-31,0 days 05:00:00,40.41,-3.71,0,0.0,2013-12-31 05:00:00,
...,...,...,...,...,...,...,...
2022-02-28,0 days 20:00:00,40.41,-3.71,0,0.0,2022-02-28 20:00:00,284.110107
2022-02-28,0 days 21:00:00,40.41,-3.71,0,0.0,2022-02-28 21:00:00,282.633789
2022-02-28,0 days 22:00:00,40.41,-3.71,0,0.0,2022-02-28 22:00:00,281.246094
2022-02-28,0 days 23:00:00,40.41,-3.71,0,0.0,2022-02-28 23:00:00,280.459717


In [10]:
ds_dataframe.to_csv(r'ficheros_temperatura/madrid_2014_2022.csv', index=True)

In [185]:
ds_dataframe = pd.read_csv('ficheros_temperatura/madrid_2014_2022.csv')
ds_dataframe

Unnamed: 0,time,step,latitude,longitude,number,surface,valid_time,t2m
0,2013-12-31,0 days 01:00:00,40.41,-3.71,0,0.0,2013-12-31 01:00:00,
1,2013-12-31,0 days 02:00:00,40.41,-3.71,0,0.0,2013-12-31 02:00:00,
2,2013-12-31,0 days 03:00:00,40.41,-3.71,0,0.0,2013-12-31 03:00:00,
3,2013-12-31,0 days 04:00:00,40.41,-3.71,0,0.0,2013-12-31 04:00:00,
4,2013-12-31,0 days 05:00:00,40.41,-3.71,0,0.0,2013-12-31 05:00:00,
...,...,...,...,...,...,...,...,...
71563,2022-02-28,0 days 20:00:00,40.41,-3.71,0,0.0,2022-02-28 20:00:00,284.11010
71564,2022-02-28,0 days 21:00:00,40.41,-3.71,0,0.0,2022-02-28 21:00:00,282.63380
71565,2022-02-28,0 days 22:00:00,40.41,-3.71,0,0.0,2022-02-28 22:00:00,281.24610
71566,2022-02-28,0 days 23:00:00,40.41,-3.71,0,0.0,2022-02-28 23:00:00,280.45972


In [186]:
ds_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71568 entries, 0 to 71567
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   time        71568 non-null  object 
 1   step        71568 non-null  object 
 2   latitude    71568 non-null  float64
 3   longitude   71568 non-null  float64
 4   number      71568 non-null  int64  
 5   surface     71568 non-null  float64
 6   valid_time  71568 non-null  object 
 7   t2m         71545 non-null  float64
dtypes: float64(4), int64(1), object(3)
memory usage: 4.4+ MB


In [187]:
ds_dataframe['latitude'].unique()

array([40.41])

In [188]:
ds_dataframe['longitude'].unique()

array([-3.71])

In [189]:
ds_dataframe['surface'].unique()

array([0.])

In [190]:
ds_dataframe.loc[ds_dataframe['time'] == '2013-12-31']

Unnamed: 0,time,step,latitude,longitude,number,surface,valid_time,t2m
0,2013-12-31,0 days 01:00:00,40.41,-3.71,0,0.0,2013-12-31 01:00:00,
1,2013-12-31,0 days 02:00:00,40.41,-3.71,0,0.0,2013-12-31 02:00:00,
2,2013-12-31,0 days 03:00:00,40.41,-3.71,0,0.0,2013-12-31 03:00:00,
3,2013-12-31,0 days 04:00:00,40.41,-3.71,0,0.0,2013-12-31 04:00:00,
4,2013-12-31,0 days 05:00:00,40.41,-3.71,0,0.0,2013-12-31 05:00:00,
5,2013-12-31,0 days 06:00:00,40.41,-3.71,0,0.0,2013-12-31 06:00:00,
6,2013-12-31,0 days 07:00:00,40.41,-3.71,0,0.0,2013-12-31 07:00:00,
7,2013-12-31,0 days 08:00:00,40.41,-3.71,0,0.0,2013-12-31 08:00:00,
8,2013-12-31,0 days 09:00:00,40.41,-3.71,0,0.0,2013-12-31 09:00:00,
9,2013-12-31,0 days 10:00:00,40.41,-3.71,0,0.0,2013-12-31 10:00:00,


In [191]:
# vamos a descomponer 'valid_time' en 'date' y 'hour'

In [192]:
import datetime

In [193]:
# extracting date from timestamp
ds_dataframe['valid_time'] = pd.to_datetime(ds_dataframe['valid_time'])

In [194]:
ds_dataframe['date'] = [datetime.datetime.date(d) for d in ds_dataframe['valid_time']] 

In [195]:
ds_dataframe['hour'] = [datetime.datetime.time(d) for d in ds_dataframe['valid_time']] 

In [196]:
ds_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71568 entries, 0 to 71567
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   time        71568 non-null  object        
 1   step        71568 non-null  object        
 2   latitude    71568 non-null  float64       
 3   longitude   71568 non-null  float64       
 4   number      71568 non-null  int64         
 5   surface     71568 non-null  float64       
 6   valid_time  71568 non-null  datetime64[ns]
 7   t2m         71545 non-null  float64       
 8   date        71568 non-null  object        
 9   hour        71568 non-null  object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(4)
memory usage: 5.5+ MB


In [197]:
ds_dataframe.head()

Unnamed: 0,time,step,latitude,longitude,number,surface,valid_time,t2m,date,hour
0,2013-12-31,0 days 01:00:00,40.41,-3.71,0,0.0,2013-12-31 01:00:00,,2013-12-31,01:00:00
1,2013-12-31,0 days 02:00:00,40.41,-3.71,0,0.0,2013-12-31 02:00:00,,2013-12-31,02:00:00
2,2013-12-31,0 days 03:00:00,40.41,-3.71,0,0.0,2013-12-31 03:00:00,,2013-12-31,03:00:00
3,2013-12-31,0 days 04:00:00,40.41,-3.71,0,0.0,2013-12-31 04:00:00,,2013-12-31,04:00:00
4,2013-12-31,0 days 05:00:00,40.41,-3.71,0,0.0,2013-12-31 05:00:00,,2013-12-31,05:00:00


In [198]:
# simplificamos el dataset:

In [199]:
del ds_dataframe['time']
del ds_dataframe['step']
del ds_dataframe['latitude']
del ds_dataframe['longitude']
del ds_dataframe['number']
del ds_dataframe['surface']
del ds_dataframe['valid_time']

In [200]:
t2m = ds_dataframe.pop('t2m')
ds_dataframe = pd.concat([ds_dataframe, t2m], 1)
ds_dataframe.head()

  ds_dataframe = pd.concat([ds_dataframe, t2m], 1)


Unnamed: 0,date,hour,t2m
0,2013-12-31,01:00:00,
1,2013-12-31,02:00:00,
2,2013-12-31,03:00:00,
3,2013-12-31,04:00:00,
4,2013-12-31,05:00:00,


In [201]:
# borramos los valores con date='2013-12-31'

In [202]:
ds_dataframe = ds_dataframe.dropna().reset_index(drop=True)
ds_dataframe

Unnamed: 0,date,hour,t2m
0,2014-01-01,00:00:00,277.84985
1,2014-01-01,01:00:00,277.74854
2,2014-01-01,02:00:00,277.80054
3,2014-01-01,03:00:00,277.86353
4,2014-01-01,04:00:00,277.95654
...,...,...,...
71540,2022-02-28,20:00:00,284.11010
71541,2022-02-28,21:00:00,282.63380
71542,2022-02-28,22:00:00,281.24610
71543,2022-02-28,23:00:00,280.45972


In [203]:
ds_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71545 entries, 0 to 71544
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    71545 non-null  object 
 1   hour    71545 non-null  object 
 2   t2m     71545 non-null  float64
dtypes: float64(1), object(2)
memory usage: 1.6+ MB


In [204]:
# ponemos la fecha con /, la hora tipo '00' y la temperatura en grados

In [205]:
## fecha
import datetime

ds_dataframe['date'] = pd.to_datetime(ds_dataframe['date'], format='%Y-%m-%d').dt.strftime('%Y/%m/%d')

In [206]:
## hora 
ds_dataframe['hour'] = pd.to_datetime(ds_dataframe['hour'], format='%H:%M:%S').dt.strftime('%H')

In [207]:
## temperatura
ds_dataframe['t2m'] = ds_dataframe['t2m']-273.15

In [208]:
ds_dataframe

Unnamed: 0,date,hour,t2m
0,2014/01/01,00,4.69985
1,2014/01/01,01,4.59854
2,2014/01/01,02,4.65054
3,2014/01/01,03,4.71353
4,2014/01/01,04,4.80654
...,...,...,...
71540,2022/02/28,20,10.96010
71541,2022/02/28,21,9.48380
71542,2022/02/28,22,8.09610
71543,2022/02/28,23,7.30972


In [209]:
# guardamos
ds_dataframe = ds_dataframe.to_csv(r'ficheros_temperatura/madrid_2014_2022_processed.csv', index=True)
ds_dataframe

In [210]:
ds_dataframe = pd.read_csv('ficheros_temperatura/madrid_2014_2022_processed.csv')

In [211]:
ds_dataframe.drop("Unnamed: 0",axis=1)

Unnamed: 0,date,hour,t2m
0,2014/01/01,0,4.69985
1,2014/01/01,1,4.59854
2,2014/01/01,2,4.65054
3,2014/01/01,3,4.71353
4,2014/01/01,4,4.80654
...,...,...,...
71540,2022/02/28,20,10.96010
71541,2022/02/28,21,9.48380
71542,2022/02/28,22,8.09610
71543,2022/02/28,23,7.30972


In [212]:
pd.read_csv('ficheros_temperatura/madrid_2014_2022_processed.csv')

Unnamed: 0.1,Unnamed: 0,date,hour,t2m
0,0,2014/01/01,0,4.69985
1,1,2014/01/01,1,4.59854
2,2,2014/01/01,2,4.65054
3,3,2014/01/01,3,4.71353
4,4,2014/01/01,4,4.80654
...,...,...,...,...
71540,71540,2022/02/28,20,10.96010
71541,71541,2022/02/28,21,9.48380
71542,71542,2022/02/28,22,8.09610
71543,71543,2022/02/28,23,7.30972


In [228]:
## convertir en una función
def convert_file_to_csv(grib_file):
    import xarray as xr
    import datetime
    # extraemos el nombre del archivo:
    file_name = grib_file.replace('.grib', '')
    # lectura del archivo
    ds = xr.open_dataset('ficheros_temperatura/'+grib_file, engine='cfgrib')
    # conversión a dataframe
    ds_dataframe = ds.to_dataframe()
    # convertimos a csv:
    ds_dataframe.to_csv(r'ficheros_temperatura/'+file_name+'.csv', index=True)
    # leemos el csv:
    ds_dataframe = pd.read_csv('ficheros_temperatura/'+file_name+'.csv')
    # transform dataframe:
    ds_dataframe['valid_time'] = pd.to_datetime(ds_dataframe['valid_time'])
    ds_dataframe['date'] = [datetime.datetime.date(d) for d in ds_dataframe['valid_time']]
    ds_dataframe['hour'] = [datetime.datetime.time(d) for d in ds_dataframe['valid_time']]

    del ds_dataframe['time']
    del ds_dataframe['step']
    del ds_dataframe['latitude']
    del ds_dataframe['longitude']
    del ds_dataframe['number']
    del ds_dataframe['surface']
    del ds_dataframe['valid_time']
    
    t2m = ds_dataframe.pop('t2m')
    ds_dataframe = pd.concat([ds_dataframe, t2m], 1)
    ds_dataframe.head()
    # limpiamos de NaN:
    ds_dataframe = ds_dataframe.dropna().reset_index(drop=True)
    # formateamos las columnas:
    ds_dataframe['date'] = pd.to_datetime(ds_dataframe['date'], format='%Y-%m-%d').dt.strftime('%Y/%m/%d')
    ds_dataframe['hour'] = pd.to_datetime(ds_dataframe['hour'], format='%H:%M:%S').dt.strftime('%H')
    ds_dataframe['t2m'] = ds_dataframe['t2m']-273.15
    # guardamos el csv procesado:
    new_file = file_name+'_processed.csv'
    ds_dataframe.to_csv(r'ficheros_temperatura/'+new_file, index=True)
    
    return new_file

In [222]:
ds_dataframe = convert_file_to_csv('madrid_2014_2022_new.grib')
ds_dataframe

  ds_dataframe = pd.concat([ds_dataframe, t2m], 1)


'madrid_2014_2022_new_processed.csv'

In [223]:
pd.read_csv('ficheros_temperatura/'+ds_dataframe)

Unnamed: 0.1,Unnamed: 0,date,hour,t2m
0,0,2014/01/01,0,4.69985
1,1,2014/01/01,1,4.59854
2,2,2014/01/01,2,4.65054
3,3,2014/01/01,3,4.71353
4,4,2014/01/01,4,4.80654
...,...,...,...,...
71540,71540,2022/02/28,20,10.96010
71541,71541,2022/02/28,21,9.48380
71542,71542,2022/02/28,22,8.09610
71543,71543,2022/02/28,23,7.30972


In [None]:
# Procesamiento de los ficheros
## madrid, barcelona, sevilla, bilbao, valencia
files = [
    'madrid_2014_2022_new.grib', 
    'barcelona_2014_2022_new.grib', 
    'sevilla_2014_2022_new.grib', 
    'bilbao_2014_2022_new.grib', 
    'valencia_2014_2022_new.grib'
]

files_processed = []
for file in files:
    # file_processed = {}
    file_processed = convert_file_to_csv(file)
    files_processed.append(file_processed)

  ds_dataframe = pd.concat([ds_dataframe, t2m], 1)
  ds_dataframe = pd.concat([ds_dataframe, t2m], 1)


In [None]:
files_processed