# Converting Strings to Dates

In [5]:
import numpy as np
import pandas as pd

In [6]:
date_strings = np.array(['03-04-2005 11:35 PM','23-05-2010 12:01 AM', '04-09-2009 09:09 PM'])

In [16]:
[pd.to_datetime(date,format='%d-%m-%Y %I:%M %p', errors='coerce') for date in date_strings]

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 00:01:00'),
 Timestamp('2009-09-04 21:09:00')]

# Handling Time Zones

In [17]:
pd.Timestamp('2017-05-01 06:00:00', tz='Europe/London')

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [18]:
date = pd.Timestamp('2017-05-01 06:00:00')

In [23]:
dateinlondon = date.tz_localize('Europe/London')
dateinlondon

Timestamp('2017-05-01 06:00:00+0100', tz='Europe/London')

In [24]:
dateinlondon.tz_convert('Africa/Abidjan')

Timestamp('2017-05-01 05:00:00+0000', tz='Africa/Abidjan')

In [33]:
date = pd.Series(pd.date_range('2012/02/02', periods=3 ,freq='H'))
date

0   2012-02-02 00:00:00
1   2012-02-02 01:00:00
2   2012-02-02 02:00:00
dtype: datetime64[ns]

In [35]:
date.dt.tz_localize('Africa/Abidjan')

0   2012-02-02 00:00:00+00:00
1   2012-02-02 01:00:00+00:00
2   2012-02-02 02:00:00+00:00
dtype: datetime64[ns, Africa/Abidjan]

In [36]:
from pytz import all_timezones

In [41]:
all_timezones[:20]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti']

# Selecting Dates and Times

In [65]:
dataframe = pd.DataFrame()
# Create datetimes
dataframe['date'] = pd.date_range('1/1/2001', periods=100000, freq='H')
# Select observations between two datetimes 
dataframe[(dataframe['date'] > '2002-1-1 01:00:00') & (dataframe['date'] <= '2002-1-1 04:00:00')]

Unnamed: 0,date
8762,2002-01-01 02:00:00
8763,2002-01-01 03:00:00
8764,2002-01-01 04:00:00


In [66]:
dataframe = dataframe.set_index(dataframe['date'])

In [67]:
dataframe.index.dayofweek

Int64Index([0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            ...
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
           dtype='int64', name='date', length=100000)

# Breaking Up Date Data into Multiple Features

In [71]:
D = pd.date_range('2012/02/04', periods=150, freq='M')

In [74]:
df = pd.DataFrame({'date':D, 'Year':D.year, 'Month':D.month, 'Day':D.day, 'Minute':D.minute})

In [76]:
df.head()

Unnamed: 0,date,Year,Month,Day,Minute
0,2012-02-29,2012,2,29,0
1,2012-03-31,2012,3,31,0
2,2012-04-30,2012,4,30,0
3,2012-05-31,2012,5,31,0
4,2012-06-30,2012,6,30,0


# Calculating the Difference Between Dates

In [90]:
df = pd.DataFrame({'left':[pd.Timestamp('01-01-2017'),pd.Timestamp('01-04-2017')], 'arrived':[pd.Timestamp('01-01-2017'),pd.Timestamp('01-06-2017')]})

In [91]:
df

Unnamed: 0,left,arrived
0,2017-01-01,2017-01-01
1,2017-01-04,2017-01-06


In [92]:
df['left']-df['arrived']

0    0 days
1   -2 days
dtype: timedelta64[ns]

# Encoding Days of the Week

In [118]:
datetime =pd.Series(pd.date_range('2022/01/23', periods=4, freq='D'))

In [126]:
datetime.dt.weekday

0    6
1    0
2    1
3    2
dtype: int64

In [128]:
data = pd.date_range('2022/01/23', periods=4, freq='D')

In [129]:
data.strftime('%A')

Index(['Sunday', 'Monday', 'Tuesday', 'Wednesday'], dtype='object')

# Creating a Lagged Feature

In [131]:
df2 = pd.DataFrame({'dates':pd.date_range("1/1/2001", periods=5, freq="D"), 'stock_price':[1.1,2.2,3.3,4.4,5.5]})
df2

Unnamed: 0,dates,stock_price
0,2001-01-01,1.1
1,2001-01-02,2.2
2,2001-01-03,3.3
3,2001-01-04,4.4
4,2001-01-05,5.5


In [133]:
df2['stock_price_previous'] = df2['stock_price'].shift(1)
df2

Unnamed: 0,dates,stock_price,stock_price_previous
0,2001-01-01,1.1,
1,2001-01-02,2.2,1.1
2,2001-01-03,3.3,2.2
3,2001-01-04,4.4,3.3
4,2001-01-05,5.5,4.4


# Using Rolling Time Windows

In [141]:
df3=pd.DataFrame({'date': pd.date_range("01/01/2010", periods=5, freq="M"),'stock_price':[1,2,3,4,5]})

In [143]:
df3.set_index('date')

Unnamed: 0_level_0,stock_price
date,Unnamed: 1_level_1
2010-01-31,1
2010-02-28,2
2010-03-31,3
2010-04-30,4
2010-05-31,5


In [147]:
df3['stock_price'].rolling(window=3).mean()

0    NaN
1    NaN
2    2.0
3    3.0
4    4.0
Name: stock_price, dtype: float64

In [148]:
df3['stock_price'].rolling(window=3).count()



0    1.0
1    2.0
2    3.0
3    3.0
4    3.0
Name: stock_price, dtype: float64

In [151]:
df3['stock_price'].rolling(window=3).max() #min, var, std, var

0    NaN
1    NaN
2    3.0
3    4.0
4    5.0
Name: stock_price, dtype: float64

# Handling Missing Data in Time Series

In [152]:
df4 = pd.DataFrame({'time':pd.date_range("01/01/2010", periods=5, freq="M"), 'Sales':[1.0,2.0,np.nan,np.nan,5.0]})

In [153]:
df4

Unnamed: 0,time,Sales
0,2010-01-31,1.0
1,2010-02-28,2.0
2,2010-03-31,
3,2010-04-30,
4,2010-05-31,5.0


In [155]:
df4.fillna(method='bfill')

Unnamed: 0,time,Sales
0,2010-01-31,1.0
1,2010-02-28,2.0
2,2010-03-31,5.0
3,2010-04-30,5.0
4,2010-05-31,5.0


In [156]:
df4.ffill()

Unnamed: 0,time,Sales
0,2010-01-31,1.0
1,2010-02-28,2.0
2,2010-03-31,2.0
3,2010-04-30,2.0
4,2010-05-31,5.0
