# TIME Variables - Factor Engineering

In [1]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
%matplotlib inline

In [2]:
## dummy data with 1 column and 7 different timestamp

date=pd.Series(pd.date_range('2015-1-5 11:20:00',periods=7,freq='H'))
df=pd.DataFrame(dict(date=date))
df

Unnamed: 0,date
0,2015-01-05 11:20:00
1,2015-01-05 12:20:00
2,2015-01-05 13:20:00
3,2015-01-05 14:20:00
4,2015-01-05 15:20:00
5,2015-01-05 16:20:00
6,2015-01-05 17:20:00


In [3]:
## extract hrs, min, sec

df['hour']=df['date'].dt.hour
df['min']=df['date'].dt.minute
df['sec']=df['date'].dt.second
df


Unnamed: 0,date,hour,min,sec
0,2015-01-05 11:20:00,11,20,0
1,2015-01-05 12:20:00,12,20,0
2,2015-01-05 13:20:00,13,20,0
3,2015-01-05 14:20:00,14,20,0
4,2015-01-05 15:20:00,15,20,0
5,2015-01-05 16:20:00,16,20,0
6,2015-01-05 17:20:00,17,20,0


In [4]:
## extract time part

df['time']=df['date'].dt.time
df

Unnamed: 0,date,hour,min,sec,time
0,2015-01-05 11:20:00,11,20,0,11:20:00
1,2015-01-05 12:20:00,12,20,0,12:20:00
2,2015-01-05 13:20:00,13,20,0,13:20:00
3,2015-01-05 14:20:00,14,20,0,14:20:00
4,2015-01-05 15:20:00,15,20,0,15:20:00
5,2015-01-05 16:20:00,16,20,0,16:20:00
6,2015-01-05 17:20:00,17,20,0,17:20:00


In [10]:
df[['h','m','s']] =pd.DataFrame([(x.hour,x.minute,x.second) for x in df['time']])
df

Unnamed: 0,date,hour,min,sec,time,h,m,s
0,2015-01-05 11:20:00,11,20,0,11:20:00,11,20,0
1,2015-01-05 12:20:00,12,20,0,12:20:00,12,20,0
2,2015-01-05 13:20:00,13,20,0,13:20:00,13,20,0
3,2015-01-05 14:20:00,14,20,0,14:20:00,14,20,0
4,2015-01-05 15:20:00,15,20,0,15:20:00,15,20,0
5,2015-01-05 16:20:00,16,20,0,16:20:00,16,20,0
6,2015-01-05 17:20:00,17,20,0,17:20:00,17,20,0


In [11]:
## time difference

## create a dummy ds

date1=pd.Series(pd.date_range('2013-1-5 11:20:00',periods=7,freq='H'))

date2=pd.Series(pd.date_range('2015-1-5 21:45:00',periods=7,freq='H'))

df=pd.DataFrame(dict(sdate=date1,edate=date2))
df

Unnamed: 0,sdate,edate
0,2013-01-05 11:20:00,2015-01-05 21:45:00
1,2013-01-05 12:20:00,2015-01-05 22:45:00
2,2013-01-05 13:20:00,2015-01-05 23:45:00
3,2013-01-05 14:20:00,2015-01-06 00:45:00
4,2013-01-05 15:20:00,2015-01-06 01:45:00
5,2013-01-05 16:20:00,2015-01-06 02:45:00
6,2013-01-05 17:20:00,2015-01-06 03:45:00


In [15]:
df['edate']-df['sdate']

0   730 days 10:25:00
1   730 days 10:25:00
2   730 days 10:25:00
3   730 days 10:25:00
4   730 days 10:25:00
5   730 days 10:25:00
6   730 days 10:25:00
dtype: timedelta64[ns]

In [18]:
## time difference in seconds 

df['diff_seconds']=(df['edate']-df['sdate']) /np.timedelta64(1,'s')
df

Unnamed: 0,sdate,edate,diff_seconds,diff_min
0,2013-01-05 11:20:00,2015-01-05 21:45:00,63109500.0,1051825.0
1,2013-01-05 12:20:00,2015-01-05 22:45:00,63109500.0,1051825.0
2,2013-01-05 13:20:00,2015-01-05 23:45:00,63109500.0,1051825.0
3,2013-01-05 14:20:00,2015-01-06 00:45:00,63109500.0,1051825.0
4,2013-01-05 15:20:00,2015-01-06 01:45:00,63109500.0,1051825.0
5,2013-01-05 16:20:00,2015-01-06 02:45:00,63109500.0,1051825.0
6,2013-01-05 17:20:00,2015-01-06 03:45:00,63109500.0,1051825.0


In [19]:
## time difference in mins

df['diff_min']=(df['edate']-df['sdate']) /np.timedelta64(1,'m')
df

Unnamed: 0,sdate,edate,diff_seconds,diff_min
0,2013-01-05 11:20:00,2015-01-05 21:45:00,63109500.0,1051825.0
1,2013-01-05 12:20:00,2015-01-05 22:45:00,63109500.0,1051825.0
2,2013-01-05 13:20:00,2015-01-05 23:45:00,63109500.0,1051825.0
3,2013-01-05 14:20:00,2015-01-06 00:45:00,63109500.0,1051825.0
4,2013-01-05 15:20:00,2015-01-06 01:45:00,63109500.0,1051825.0
5,2013-01-05 16:20:00,2015-01-06 02:45:00,63109500.0,1051825.0
6,2013-01-05 17:20:00,2015-01-06 03:45:00,63109500.0,1051825.0


In [22]:
## different time zones


## create dummy ds with timestamps and timezones

df=pd.DataFrame()
df['time']=pd.concat([
pd.Series(pd.date_range('2013-1-5 11:20:00',periods=3,freq='H',tz='Europe/Berlin')),
pd.Series(pd.date_range('2013-1-5 11:20:00',periods=3,freq='H',tz='US/Central'))    
],axis=0)

df


Unnamed: 0,time
0,2013-01-05 11:20:00+01:00
1,2013-01-05 12:20:00+01:00
2,2013-01-05 13:20:00+01:00
0,2013-01-05 11:20:00-06:00
1,2013-01-05 12:20:00-06:00
2,2013-01-05 13:20:00-06:00


In [23]:
## timezone indicated by meridian +01 and -06

In [25]:
## to work with different time zones, first make timezones same by  utc=True

df['time_utc']=pd.to_datetime(df['time'],utc=True)

In [26]:
df


Unnamed: 0,time,time_utc
0,2013-01-05 11:20:00+01:00,2013-01-05 10:20:00+00:00
1,2013-01-05 12:20:00+01:00,2013-01-05 11:20:00+00:00
2,2013-01-05 13:20:00+01:00,2013-01-05 12:20:00+00:00
0,2013-01-05 11:20:00-06:00,2013-01-05 17:20:00+00:00
1,2013-01-05 12:20:00-06:00,2013-01-05 18:20:00+00:00
2,2013-01-05 13:20:00-06:00,2013-01-05 19:20:00+00:00


In [27]:
df['time_london']=df['time_utc'].dt.tz_convert('Europe/London')
df

Unnamed: 0,time,time_utc,time_london
0,2013-01-05 11:20:00+01:00,2013-01-05 10:20:00+00:00,2013-01-05 10:20:00+00:00
1,2013-01-05 12:20:00+01:00,2013-01-05 11:20:00+00:00,2013-01-05 11:20:00+00:00
2,2013-01-05 13:20:00+01:00,2013-01-05 12:20:00+00:00,2013-01-05 12:20:00+00:00
0,2013-01-05 11:20:00-06:00,2013-01-05 17:20:00+00:00,2013-01-05 17:20:00+00:00
1,2013-01-05 12:20:00-06:00,2013-01-05 18:20:00+00:00,2013-01-05 18:20:00+00:00
2,2013-01-05 13:20:00-06:00,2013-01-05 19:20:00+00:00,2013-01-05 19:20:00+00:00


In [28]:
df['time_US']=df['time_utc'].dt.tz_convert('US/Central')
df

Unnamed: 0,time,time_utc,time_london,time_US
0,2013-01-05 11:20:00+01:00,2013-01-05 10:20:00+00:00,2013-01-05 10:20:00+00:00,2013-01-05 04:20:00-06:00
1,2013-01-05 12:20:00+01:00,2013-01-05 11:20:00+00:00,2013-01-05 11:20:00+00:00,2013-01-05 05:20:00-06:00
2,2013-01-05 13:20:00+01:00,2013-01-05 12:20:00+00:00,2013-01-05 12:20:00+00:00,2013-01-05 06:20:00-06:00
0,2013-01-05 11:20:00-06:00,2013-01-05 17:20:00+00:00,2013-01-05 17:20:00+00:00,2013-01-05 11:20:00-06:00
1,2013-01-05 12:20:00-06:00,2013-01-05 18:20:00+00:00,2013-01-05 18:20:00+00:00,2013-01-05 12:20:00-06:00
2,2013-01-05 13:20:00-06:00,2013-01-05 19:20:00+00:00,2013-01-05 19:20:00+00:00,2013-01-05 13:20:00-06:00
