# Parsing de Fechas

En este JNB se revisará el tema de Parsing de Fechas.

In [1]:
print("Hello World!")

Hello World!


In [4]:
# modules we'll use
import pandas as pd
import numpy as np
import seaborn as sns
import datetime

# read in our data
landslides = pd.read_csv("./data/catalog.txt")

# set seed for reproducibility
np.random.seed(0)

In [8]:
landslides.head(23)

Unnamed: 0,id,date,time,continent_code,country_name,country_code,state/province,population,city/town,distance,...,geolocation,hazard_type,landslide_type,landslide_size,trigger,storm_name,injuries,fatalities,source_name,source_link
0,34,3/2/07,Night,,United States,US,Virginia,16000,Cherry Hill,3.40765,...,"(38.600900000000003, -77.268199999999993)",Landslide,Landslide,Small,Rain,,,,NBC 4 news,http://www.nbc4.com/news/11186871/detail.html
1,42,3/22/07,,,United States,US,Ohio,17288,New Philadelphia,3.33522,...,"(40.517499999999998, -81.430499999999995)",Landslide,Landslide,Small,Rain,,,,Canton Rep.com,http://www.cantonrep.com/index.php?ID=345054&C...
2,56,4/6/07,,,United States,US,Pennsylvania,15930,Wilkinsburg,2.91977,...,"(40.4377, -79.915999999999997)",Landslide,Landslide,Small,Rain,,,,The Pittsburgh Channel.com,https://web.archive.org/web/20080423132842/htt...
3,59,4/14/07,,,Canada,CA,Quebec,42786,Châteauguay,2.98682,...,"(45.322600000000001, -73.777100000000004)",Landslide,Riverbank collapse,Small,Rain,,,,Le Soleil,http://www.hebdos.net/lsc/edition162007/articl...
4,61,4/15/07,,,United States,US,Kentucky,6903,Pikeville,5.66542,...,"(37.432499999999997, -82.493099999999998)",Landslide,Landslide,Small,Downpour,,,0.0,Matthew Crawford (KGS),
5,64,4/20/07,,,United States,US,Kentucky,6903,Pikeville,0.23715,...,"(37.481400000000001, -82.518600000000006)",Landslide,Landslide,Small,Rain,,,,Applalachain news-express,http://www.news-expressky.com/articles/2007/04...
6,67,4/24/07,,,United States,US,South Dakota,2540,Dakota Dunes,2.48033,...,"(42.494100000000003, -96.457599999999999)",Landslide,Landslide,Small,Rain,,,,Sioux City Journnal,http://www.siouxcityjournal.com/articles/2007/...
7,77,5/21/07,,SA,Colombia,CO,Risaralda,440118,Pereira,0.62022,...,"(4.8080999999999996, -75.694100000000006)",Landslide,Mudslide,Large,Rain,,,13.0,Reuters - AlertNet.org,http://www.reuters.com/news/video/videoStory?v...
8,105,6/27/07,,SA,Ecuador,EC,Zamora-Chinchipe,15276,Zamora,0.47714,...,"(-4.0650000000000004, -78.950999999999993)",Landslide,Landslide,Medium,Downpour,,,,Red Cross - Field reports,https://www-secure.ifrc.org/dmis/prepare/view_...
9,106,6/27/07,,SA,Ecuador,EC,Loja,117796,Loja,0.35649,...,"(-3.99, -79.204999999999998)",Landslide,Landslide,Medium,Downpour,,,,Red Cross - Field reports,https://www-secure.ifrc.org/dmis/prepare/view_...


In [6]:
# print the first few rows of the date column
print(landslides['date'].head())

0     3/2/07
1    3/22/07
2     4/6/07
3    4/14/07
4    4/15/07
Name: date, dtype: object


In [7]:
landslides.dtypes

id                        int64
date                     object
time                     object
continent_code           object
country_name             object
country_code             object
state/province           object
population                int64
city/town                object
distance                float64
location_description     object
latitude                float64
longitude               float64
geolocation              object
hazard_type              object
landslide_type           object
landslide_size           object
trigger                  object
storm_name               object
injuries                float64
fatalities              float64
source_name              object
source_link              object
dtype: object

In [9]:
# create a new column, date_parsed, with the parsed dates
landslides['date_parsed'] = pd.to_datetime(landslides['date'], format="%m/%d/%y")

In [10]:
landslides.head(23)

Unnamed: 0,id,date,time,continent_code,country_name,country_code,state/province,population,city/town,distance,...,hazard_type,landslide_type,landslide_size,trigger,storm_name,injuries,fatalities,source_name,source_link,date_parsed
0,34,3/2/07,Night,,United States,US,Virginia,16000,Cherry Hill,3.40765,...,Landslide,Landslide,Small,Rain,,,,NBC 4 news,http://www.nbc4.com/news/11186871/detail.html,2007-03-02
1,42,3/22/07,,,United States,US,Ohio,17288,New Philadelphia,3.33522,...,Landslide,Landslide,Small,Rain,,,,Canton Rep.com,http://www.cantonrep.com/index.php?ID=345054&C...,2007-03-22
2,56,4/6/07,,,United States,US,Pennsylvania,15930,Wilkinsburg,2.91977,...,Landslide,Landslide,Small,Rain,,,,The Pittsburgh Channel.com,https://web.archive.org/web/20080423132842/htt...,2007-04-06
3,59,4/14/07,,,Canada,CA,Quebec,42786,Châteauguay,2.98682,...,Landslide,Riverbank collapse,Small,Rain,,,,Le Soleil,http://www.hebdos.net/lsc/edition162007/articl...,2007-04-14
4,61,4/15/07,,,United States,US,Kentucky,6903,Pikeville,5.66542,...,Landslide,Landslide,Small,Downpour,,,0.0,Matthew Crawford (KGS),,2007-04-15
5,64,4/20/07,,,United States,US,Kentucky,6903,Pikeville,0.23715,...,Landslide,Landslide,Small,Rain,,,,Applalachain news-express,http://www.news-expressky.com/articles/2007/04...,2007-04-20
6,67,4/24/07,,,United States,US,South Dakota,2540,Dakota Dunes,2.48033,...,Landslide,Landslide,Small,Rain,,,,Sioux City Journnal,http://www.siouxcityjournal.com/articles/2007/...,2007-04-24
7,77,5/21/07,,SA,Colombia,CO,Risaralda,440118,Pereira,0.62022,...,Landslide,Mudslide,Large,Rain,,,13.0,Reuters - AlertNet.org,http://www.reuters.com/news/video/videoStory?v...,2007-05-21
8,105,6/27/07,,SA,Ecuador,EC,Zamora-Chinchipe,15276,Zamora,0.47714,...,Landslide,Landslide,Medium,Downpour,,,,Red Cross - Field reports,https://www-secure.ifrc.org/dmis/prepare/view_...,2007-06-27
9,106,6/27/07,,SA,Ecuador,EC,Loja,117796,Loja,0.35649,...,Landslide,Landslide,Medium,Downpour,,,,Red Cross - Field reports,https://www-secure.ifrc.org/dmis/prepare/view_...,2007-06-27


In [11]:
landslides.dtypes

id                               int64
date                            object
time                            object
continent_code                  object
country_name                    object
country_code                    object
state/province                  object
population                       int64
city/town                       object
distance                       float64
location_description            object
latitude                       float64
longitude                      float64
geolocation                     object
hazard_type                     object
landslide_type                  object
landslide_size                  object
trigger                         object
storm_name                      object
injuries                       float64
fatalities                     float64
source_name                     object
source_link                     object
date_parsed             datetime64[ns]
dtype: object

In [15]:
landslides['date2'] = pd.to_datetime(landslides['date'], infer_datetime_format=True)

In [16]:
landslides.head(23)

Unnamed: 0,id,date,time,continent_code,country_name,country_code,state/province,population,city/town,distance,...,landslide_type,landslide_size,trigger,storm_name,injuries,fatalities,source_name,source_link,date_parsed,date2
0,34,3/2/07,Night,,United States,US,Virginia,16000,Cherry Hill,3.40765,...,Landslide,Small,Rain,,,,NBC 4 news,http://www.nbc4.com/news/11186871/detail.html,2007-03-02,2007-03-02
1,42,3/22/07,,,United States,US,Ohio,17288,New Philadelphia,3.33522,...,Landslide,Small,Rain,,,,Canton Rep.com,http://www.cantonrep.com/index.php?ID=345054&C...,2007-03-22,2007-03-22
2,56,4/6/07,,,United States,US,Pennsylvania,15930,Wilkinsburg,2.91977,...,Landslide,Small,Rain,,,,The Pittsburgh Channel.com,https://web.archive.org/web/20080423132842/htt...,2007-04-06,2007-04-06
3,59,4/14/07,,,Canada,CA,Quebec,42786,Châteauguay,2.98682,...,Riverbank collapse,Small,Rain,,,,Le Soleil,http://www.hebdos.net/lsc/edition162007/articl...,2007-04-14,2007-04-14
4,61,4/15/07,,,United States,US,Kentucky,6903,Pikeville,5.66542,...,Landslide,Small,Downpour,,,0.0,Matthew Crawford (KGS),,2007-04-15,2007-04-15
5,64,4/20/07,,,United States,US,Kentucky,6903,Pikeville,0.23715,...,Landslide,Small,Rain,,,,Applalachain news-express,http://www.news-expressky.com/articles/2007/04...,2007-04-20,2007-04-20
6,67,4/24/07,,,United States,US,South Dakota,2540,Dakota Dunes,2.48033,...,Landslide,Small,Rain,,,,Sioux City Journnal,http://www.siouxcityjournal.com/articles/2007/...,2007-04-24,2007-04-24
7,77,5/21/07,,SA,Colombia,CO,Risaralda,440118,Pereira,0.62022,...,Mudslide,Large,Rain,,,13.0,Reuters - AlertNet.org,http://www.reuters.com/news/video/videoStory?v...,2007-05-21,2007-05-21
8,105,6/27/07,,SA,Ecuador,EC,Zamora-Chinchipe,15276,Zamora,0.47714,...,Landslide,Medium,Downpour,,,,Red Cross - Field reports,https://www-secure.ifrc.org/dmis/prepare/view_...,2007-06-27,2007-06-27
9,106,6/27/07,,SA,Ecuador,EC,Loja,117796,Loja,0.35649,...,Landslide,Medium,Downpour,,,,Red Cross - Field reports,https://www-secure.ifrc.org/dmis/prepare/view_...,2007-06-27,2007-06-27


In [17]:
# get the day of the month from the date_parsed column
day_of_month_landslides = landslides['date_parsed'].dt.day
day_of_month_landslides.head()

0     2.0
1    22.0
2     6.0
3    14.0
4    15.0
Name: date_parsed, dtype: float64