# Simplificando el trabajo con fechas y horas en Python/Pandas

## Importar librerías

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import datetime
from pytz import timezone
from pytz import all_timezones

## Cargar los datos a utilizar

In [2]:
dir_prin = Path.cwd()
dir_entradas = dir_prin/'entradas'
dir_intermedios = dir_prin/'intermedios'
dir_salidas = dir_prin/'salidas'
df = pd.read_excel(dir_intermedios/ 'clima2018_format.xlsx')

In [3]:
ddias = {'Monday': 'lun',
'Tuesday': 'mar',
'Wednesday': 'mie',
'Thursday': 'jue',
'Friday': 'vie',
'Saturday': 'sab',
'Sunday': 'dom'}

In [4]:
dmeses = {'January': 'ene',
'February': 'feb',
'March': 'mar',
'April': 'abr',
'May': 'may',
'June': 'jun',
'July': 'jul',
'August': 'ago',
'September': 'sep',
'October': 'oct',
'November': 'nov',
'December': 'dic'}

## Revisar los datos

In [5]:
df.head()

Unnamed: 0,time,precip_intensity,temperature,apparent_temperature,dew_point,humidity,pressure,wind_speed,cloud_cover
0,2018-01-01 06:00:00,0.0,22.44,22.73,18.1,0.77,,0.0,0.19
1,2018-01-01 07:00:00,0.0,21.88,22.15,17.75,0.77,,0.0,
2,2018-01-01 08:00:00,0.0,21.33,21.63,17.94,0.81,,0.0,
3,2018-01-01 09:00:00,0.0,20.77,21.07,17.75,0.83,,0.0,
4,2018-01-01 10:00:00,0.0,19.11,19.33,16.79,0.86,,0.0,


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8642 entries, 0 to 8641
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   time                  8642 non-null   datetime64[ns]
 1   precip_intensity      3361 non-null   float64       
 2   temperature           8640 non-null   float64       
 3   apparent_temperature  8640 non-null   float64       
 4   dew_point             8640 non-null   float64       
 5   humidity              8640 non-null   float64       
 6   pressure              98 non-null     float64       
 7   wind_speed            8306 non-null   float64       
 8   cloud_cover           6121 non-null   float64       
dtypes: datetime64[ns](1), float64(8)
memory usage: 607.7 KB


## Manipulación de fechas y horas en Python

### Creación, obtención de información y manipulación de un objeto Date

#### Crear objeto date

In [7]:

fecha = datetime.datetime(2019, 4, 29)
print(fecha)

2019-04-29 00:00:00


In [8]:
type(fecha)

datetime.datetime

#### Obtener información de un objeto Date

In [9]:

print(datetime.date.today())
anio = datetime.date.today().year
mes = datetime.date.today().month
dia = datetime.date.today().day

print("Hoy es el dia {}, del mes {}, del año {}".format(dia, mes, anio))

2020-02-25
Hoy es el dia 25, del mes 2, del año 2020


#### Manipulación de un objeto Date

Restar o sumar un objeto Date

In [10]:
fecha_ant = str(datetime.date.today() - datetime.timedelta(days = 3))
print("Hace tres días la fecha era {}".format(fecha_ant))

Hace tres días la fecha era 2020-02-22


In [11]:
fecha_ant = str(datetime.date.today() + datetime.timedelta(days = 3))
print(fecha_ant)

2020-02-28


### Obtener fecha y hora actual en la zona horaria UTC

In [12]:
hora_utc = datetime.datetime.now(timezone('UTC'))
hora_utc

datetime.datetime(2020, 2, 26, 3, 33, 41, 721302, tzinfo=<UTC>)

In [13]:
fmt = "%Y-%m-%d %H:%M:%S %Z%z"
print("La fecha y hora actual UTC es: {}".format(hora_utc.strftime(fmt)))

La fecha y hora actual UTC es: 2020-02-26 03:33:41 UTC+0000


### Listado de zonas horarias disponibles

In [14]:
print(len(all_timezones))
for zona in all_timezones:
    print(zona)

592
Africa/Abidjan
Africa/Accra
Africa/Addis_Ababa
Africa/Algiers
Africa/Asmara
Africa/Asmera
Africa/Bamako
Africa/Bangui
Africa/Banjul
Africa/Bissau
Africa/Blantyre
Africa/Brazzaville
Africa/Bujumbura
Africa/Cairo
Africa/Casablanca
Africa/Ceuta
Africa/Conakry
Africa/Dakar
Africa/Dar_es_Salaam
Africa/Djibouti
Africa/Douala
Africa/El_Aaiun
Africa/Freetown
Africa/Gaborone
Africa/Harare
Africa/Johannesburg
Africa/Juba
Africa/Kampala
Africa/Khartoum
Africa/Kigali
Africa/Kinshasa
Africa/Lagos
Africa/Libreville
Africa/Lome
Africa/Luanda
Africa/Lubumbashi
Africa/Lusaka
Africa/Malabo
Africa/Maputo
Africa/Maseru
Africa/Mbabane
Africa/Mogadishu
Africa/Monrovia
Africa/Nairobi
Africa/Ndjamena
Africa/Niamey
Africa/Nouakchott
Africa/Ouagadougou
Africa/Porto-Novo
Africa/Sao_Tome
Africa/Timbuktu
Africa/Tripoli
Africa/Tunis
Africa/Windhoek
America/Adak
America/Anchorage
America/Anguilla
America/Antigua
America/Araguaina
America/Argentina/Buenos_Aires
America/Argentina/Catamarca
America/Argentina/ComodR

### Buscar zona horaria específica

In [15]:
for zona in all_timezones:
    if 'Sal' in zona:
        print(zona)

Africa/Dar_es_Salaam
America/Argentina/Salta
America/El_Salvador
America/North_Dakota/New_Salem


### Convertir a la zona horaria que necesitas

In [16]:
hora_slv = hora_utc.astimezone(timezone('America/El_Salvador'))
print(hora_slv.strftime(fmt))

2020-02-25 21:33:41 CST-0600


## Manipulación de fechas y horas en Pandas

### Definición de la zona horaria y cambiar la fecha y hora a otra zona horaria

In [17]:
df['time'][0]

Timestamp('2018-01-01 06:00:00')

In [18]:
df['time'][0].tz_localize('UTC').astimezone(timezone('America/El_Salvador'))

Timestamp('2018-01-01 00:00:00-0600', tz='America/El_Salvador')

In [19]:
df['time'].tz_localize('UTC').astimezone(timezone('America/El_Salvador'))

TypeError: index is not a valid DatetimeIndex or PeriodIndex

#### Ajustar el index

In [20]:
df = df.set_index('time')
df.head()

Unnamed: 0_level_0,precip_intensity,temperature,apparent_temperature,dew_point,humidity,pressure,wind_speed,cloud_cover
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-01 06:00:00,0.0,22.44,22.73,18.1,0.77,,0.0,0.19
2018-01-01 07:00:00,0.0,21.88,22.15,17.75,0.77,,0.0,
2018-01-01 08:00:00,0.0,21.33,21.63,17.94,0.81,,0.0,
2018-01-01 09:00:00,0.0,20.77,21.07,17.75,0.83,,0.0,
2018-01-01 10:00:00,0.0,19.11,19.33,16.79,0.86,,0.0,


In [21]:
df = df.tz_localize('UTC')
df.head()

Unnamed: 0_level_0,precip_intensity,temperature,apparent_temperature,dew_point,humidity,pressure,wind_speed,cloud_cover
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-01 06:00:00+00:00,0.0,22.44,22.73,18.1,0.77,,0.0,0.19
2018-01-01 07:00:00+00:00,0.0,21.88,22.15,17.75,0.77,,0.0,
2018-01-01 08:00:00+00:00,0.0,21.33,21.63,17.94,0.81,,0.0,
2018-01-01 09:00:00+00:00,0.0,20.77,21.07,17.75,0.83,,0.0,
2018-01-01 10:00:00+00:00,0.0,19.11,19.33,16.79,0.86,,0.0,


In [22]:
df = df.tz_convert('America/El_Salvador')
df.head()

Unnamed: 0_level_0,precip_intensity,temperature,apparent_temperature,dew_point,humidity,pressure,wind_speed,cloud_cover
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-01 00:00:00-06:00,0.0,22.44,22.73,18.1,0.77,,0.0,0.19
2018-01-01 01:00:00-06:00,0.0,21.88,22.15,17.75,0.77,,0.0,
2018-01-01 02:00:00-06:00,0.0,21.33,21.63,17.94,0.81,,0.0,
2018-01-01 03:00:00-06:00,0.0,20.77,21.07,17.75,0.83,,0.0,
2018-01-01 04:00:00-06:00,0.0,19.11,19.33,16.79,0.86,,0.0,


### Obtención de información (Año, mes, día, hora, etc.)

In [23]:
def info_datetime(df, lista):
    return pd.DataFrame({
        'anio': df.index[lista].year,
        'mes': df.index[lista].month,
        'nombre_mes': df.index[lista].month_name(),
        'dia_anio': df.index[lista].dayofyear,
        'dia': df.index[lista].day,
        'dia_semana': df.index[lista].dayofweek,
        'dia_nombre': df.index[lista].day_name(),
        'hora': df.index[lista].hour,
        'minuto':df.index[lista].minute
    })

In [24]:
info_datetime(df, [0, 200, 500, -100, -1])

Unnamed: 0,anio,mes,nombre_mes,dia_anio,dia,dia_semana,dia_nombre,hora,minuto
0,2018,1,January,1,1,0,Monday,0,0
1,2018,1,January,9,9,1,Tuesday,8,0
2,2018,1,January,21,21,6,Sunday,20,0
3,2018,12,December,361,27,3,Thursday,17,0
4,2018,12,December,365,31,0,Monday,23,0


### Agregar al dataframe una columna con el dato del día, mes u hora

#### Funciones tipo lambda

In [25]:
fdias = lambda nombre: ddias[nombre]
print(fdias('Wednesday'))

mie


In [26]:
fmeses = lambda nombre: dmeses[nombre]
print(fmeses('June'))

jun


#### Información de los días

In [33]:
df['dia'] = pd.Series(df.index.day_name()).apply(fdias).values

In [34]:
df.head()

Unnamed: 0_level_0,precip_intensity,temperature,apparent_temperature,dew_point,humidity,pressure,wind_speed,cloud_cover,dia
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-01 00:00:00-06:00,0.0,22.44,22.73,18.1,0.77,,0.0,0.19,lun
2018-01-01 01:00:00-06:00,0.0,21.88,22.15,17.75,0.77,,0.0,,lun
2018-01-01 02:00:00-06:00,0.0,21.33,21.63,17.94,0.81,,0.0,,lun
2018-01-01 03:00:00-06:00,0.0,20.77,21.07,17.75,0.83,,0.0,,lun
2018-01-01 04:00:00-06:00,0.0,19.11,19.33,16.79,0.86,,0.0,,lun


#### Información de los meses

In [35]:
df['mes'] = pd.Series(df.index.month_name()).apply(fmeses).values

#### Información de las horas

In [36]:
df['hora'] = pd.Series(df.index.hour).values

In [37]:
df.head()

Unnamed: 0_level_0,precip_intensity,temperature,apparent_temperature,dew_point,humidity,pressure,wind_speed,cloud_cover,dia,mes,hora
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-01-01 00:00:00-06:00,0.0,22.44,22.73,18.1,0.77,,0.0,0.19,lun,ene,0
2018-01-01 01:00:00-06:00,0.0,21.88,22.15,17.75,0.77,,0.0,,lun,ene,1
2018-01-01 02:00:00-06:00,0.0,21.33,21.63,17.94,0.81,,0.0,,lun,ene,2
2018-01-01 03:00:00-06:00,0.0,20.77,21.07,17.75,0.83,,0.0,,lun,ene,3
2018-01-01 04:00:00-06:00,0.0,19.11,19.33,16.79,0.86,,0.0,,lun,ene,4


In [38]:
df.to_csv(dir_salidas/ 'clima2018_ZTCorrec.csv')

In [39]:
df.to_excel(dir_intermedios/ 'clima2018_ZTCorrec.xlsx')

ValueError: Excel does not support datetimes with timezones. Please ensure that datetimes are timezone unaware before writing to Excel.

#### Quitar la información de la zona horaria

In [40]:
df = df.tz_localize(None)
df.head()

Unnamed: 0_level_0,precip_intensity,temperature,apparent_temperature,dew_point,humidity,pressure,wind_speed,cloud_cover,dia,mes,hora
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-01-01 00:00:00,0.0,22.44,22.73,18.1,0.77,,0.0,0.19,lun,ene,0
2018-01-01 01:00:00,0.0,21.88,22.15,17.75,0.77,,0.0,,lun,ene,1
2018-01-01 02:00:00,0.0,21.33,21.63,17.94,0.81,,0.0,,lun,ene,2
2018-01-01 03:00:00,0.0,20.77,21.07,17.75,0.83,,0.0,,lun,ene,3
2018-01-01 04:00:00,0.0,19.11,19.33,16.79,0.86,,0.0,,lun,ene,4


In [41]:
df.to_excel(dir_intermedios/ 'clima2018_ZTCorrec.xlsx')

### Función resample

In [42]:
index = pd.date_range('4/29/2019', periods=90720, freq='T')
serie1 = pd.Series(np.random.randint(0,1000, size = 90720), index=index)
serie1.head()

2019-04-29 00:00:00    639
2019-04-29 00:01:00      2
2019-04-29 00:02:00    795
2019-04-29 00:03:00    159
2019-04-29 00:04:00    428
Freq: T, dtype: int32

In [43]:
serie1.tail()

2019-06-30 23:55:00    897
2019-06-30 23:56:00    447
2019-06-30 23:57:00    702
2019-06-30 23:58:00     71
2019-06-30 23:59:00    324
Freq: T, dtype: int32

In [44]:
serie1.resample('2T').sum().head()

2019-04-29 00:00:00     641
2019-04-29 00:02:00     954
2019-04-29 00:04:00    1075
2019-04-29 00:06:00     831
2019-04-29 00:08:00    1315
Freq: 2T, dtype: int32

In [45]:
serie1.resample('2min').sum().head()

2019-04-29 00:00:00     641
2019-04-29 00:02:00     954
2019-04-29 00:04:00    1075
2019-04-29 00:06:00     831
2019-04-29 00:08:00    1315
Freq: 2T, dtype: int32

In [46]:
serie1.resample('1H').mean().head(10)

2019-04-29 00:00:00    467.783333
2019-04-29 01:00:00    565.700000
2019-04-29 02:00:00    489.633333
2019-04-29 03:00:00    478.066667
2019-04-29 04:00:00    506.083333
2019-04-29 05:00:00    473.333333
2019-04-29 06:00:00    460.250000
2019-04-29 07:00:00    460.950000
2019-04-29 08:00:00    530.016667
2019-04-29 09:00:00    568.833333
Freq: H, dtype: float64

In [47]:
serie1.resample('D').mean().head(15)

2019-04-29    507.927083
2019-04-30    500.411111
2019-05-01    502.736111
2019-05-02    486.997917
2019-05-03    501.096528
2019-05-04    493.948611
2019-05-05    483.519444
2019-05-06    491.832639
2019-05-07    513.713889
2019-05-08    499.726389
2019-05-09    486.541667
2019-05-10    501.460417
2019-05-11    492.178472
2019-05-12    517.785417
2019-05-13    503.687500
Freq: D, dtype: float64

In [48]:
serie1.resample('M').mean()

2019-04-30    504.169097
2019-05-31    497.872625
2019-06-30    499.474352
Freq: M, dtype: float64

In [49]:
serie1.resample('Y').mean()

2019-12-31    498.83524
Freq: A-DEC, dtype: float64

In [50]:
serie1.resample('30S').asfreq().head(10)

2019-04-29 00:00:00    639.0
2019-04-29 00:00:30      NaN
2019-04-29 00:01:00      2.0
2019-04-29 00:01:30      NaN
2019-04-29 00:02:00    795.0
2019-04-29 00:02:30      NaN
2019-04-29 00:03:00    159.0
2019-04-29 00:03:30      NaN
2019-04-29 00:04:00    428.0
2019-04-29 00:04:30      NaN
Freq: 30S, dtype: float64

In [52]:
serie1.resample('S').pad().head(10)

2019-04-29 00:00:00    639
2019-04-29 00:00:01    639
2019-04-29 00:00:02    639
2019-04-29 00:00:03    639
2019-04-29 00:00:04    639
2019-04-29 00:00:05    639
2019-04-29 00:00:06    639
2019-04-29 00:00:07    639
2019-04-29 00:00:08    639
2019-04-29 00:00:09    639
Freq: S, dtype: int32

# No olvides suscribirte a Simplificando Datos