## 7.1 Converting Strings to Dates

In [2]:
import numpy as np
import pandas as pd

date_strings = np.array([
    '03-04-2005 11:35 PM',
    '23-05-2010 12:01 AM',
    '04-09-2009 09:09 PM'
])

# convert to datetimes
[pd.to_datetime(date, format='%d-%m-%Y %I:%M %p') for date in date_strings]

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 00:01:00'),
 Timestamp('2009-09-04 21:09:00')]

In [3]:
[pd.to_datetime(date, format='%d-%m-%Y %I:%M %p', errors='coerce') for date in date_strings]

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 00:01:00'),
 Timestamp('2009-09-04 21:09:00')]

### See Also
* http://strftime.org/

## 7.2 Handling Time Zones

In [4]:
import pandas as pd

pd.Timestamp('2017-05-01 06:00:00', tz='Europe/London')

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [5]:
date = pd.Timestamp('2017-05-01 06:00:00')

date_in_london = date.tz_localize('Europe/London')

date_in_london

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [8]:
date_in_london.tz_convert('Africa/Abidjan')

Timestamp('2017-05-01 05:00:00+0000', tz='Africa/Abidjan')

In [9]:
dates = pd.Series(pd.date_range('2/2/2002', periods=3, freq='M'))

dates.dt.tz_localize('Africa/Abidjan')

0   2002-02-28 00:00:00+00:00
1   2002-03-31 00:00:00+00:00
2   2002-04-30 00:00:00+00:00
dtype: datetime64[ns, Africa/Abidjan]

## 7.3 Selecting Dates and Times
## 7.4 Breaking Up Date Data into Multiple Features
## 7.5 Calculating the Difference Between Dates
## 7.6 Encoding Days of the Week
## 7.7 Creating Lagged Feature
## 7.8 Using Rolling Time Windows

In [10]:
import pandas as pd

time_index = pd.date_range('01/01/2010', periods=5, freq='M')
df = pd.DataFrame(index=time_index)
df['Stock_Price'] = [1,2,3,4,5]
df.rolling(window=2).mean()

Unnamed: 0,Stock_Price
2010-01-31,
2010-02-28,1.5
2010-03-31,2.5
2010-04-30,3.5
2010-05-31,4.5


### See Also
* pandas documentation: Rolling Windows (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.rolling.html)
* What are Moving Average or Smoothing Techniques (https://www.itl.nist.gov/div898/handbook/pmc/section4/pmc42.htm)

## 7.9 Handling Missing Data in Time Series

In [11]:
import pandas as pd
import numpy as np

time_index = pd.date_range('01/01/2010', periods=5, freq='M')

df = pd.DataFrame(index=time_index)

df["Sales"] = [1.0, 2.0, np.nan, np.nan, 5.0]

df.interpolate()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,4.0
2010-05-31,5.0


In [12]:
df.ffill()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,2.0
2010-04-30,2.0
2010-05-31,5.0


In [13]:
df.bfill()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,5.0
2010-04-30,5.0
2010-05-31,5.0


In [14]:
df.interpolate(method="quadratic")

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.059808
2010-04-30,4.038069
2010-05-31,5.0


In [15]:
df.interpolate(limit=1, limit_direction="forward")

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,
2010-05-31,5.0
