# Dealing With Time Series in Python

In [1]:
from datetime import datetime
datetime.now()

datetime.datetime(2021, 4, 7, 21, 52, 5, 205979)

In [2]:
# time delta represents the temporal difference between two datetime objects
delta = datetime(2011,1,7) - datetime(2008,6,24,8,15)
delta

datetime.timedelta(926, 56700)

In [3]:
print(delta.days)
print(delta.seconds)

926
56700


In [6]:
datetime(2008,6,24,8,15)+delta

datetime.datetime(2011, 1, 7, 0, 0)

## Converting Between String and Datetime

In [10]:
#use str or strftime method to convert datetime to string
stamp = datetime(2011,1,3)
stamp.strftime('%Y/%m-%d-%w') #%w is weekday as integer 0(sunday)-6(saturday)

'2011/01-03-1'

In [11]:
# same method can be used to conver string to datetime
datetime.strptime('2011-01-03','%Y-%m-%d')

datetime.datetime(2011, 1, 3, 0, 0)

In [12]:
# automatic converting with parser
from dateutil.parser import parse
parse('2011/01/03')

datetime.datetime(2011, 1, 3, 0, 0)

In [14]:
# parse can identify more complicated string
parse('Jan 31, 1997 10:45 PM')
parse('6/12/2011',dayfirst = False)

datetime.datetime(1997, 1, 31, 22, 45)

In [16]:
# pandas use to_datetime convert string to datetime very quickly
import pandas as pd
datestrs = ['2011-07-06 12:00:00',"2019-02-28 00:00:00"]
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2019-02-28 00:00:00'], dtype='datetime64[ns]', freq=None)

In [30]:
delta = parse('2011-05-03 00:02:00-05')-parse('2011-05-03 00:01:00-05')

In [28]:
parse('2011-05-03 00:01:00-06')

datetime.datetime(2011, 5, 3, 0, 1, tzinfo=tzoffset(None, -21600))

In [29]:
parse('2011-05-03 00:02:00-05')

datetime.datetime(2011, 5, 3, 0, 2, tzinfo=tzoffset(None, -18000))

6.0

In [32]:
delta.days

0

## Generate Different Data Ranges
Pandas can handle variety of data ranges, just pass different keywords in freq argument;

In [35]:
pd.date_range('2021-01-01','2022-01-01',freq = 'BM') # last weekday of each month

DatetimeIndex(['2021-01-29', '2021-02-26', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-30', '2021-08-31',
               '2021-09-30', '2021-10-29', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='BM')

In [37]:
pd.date_range('2000-01-01','2022-01-01',freq = 'A-JAN') # annual day anchored on the last calender day of given month

DatetimeIndex(['2000-01-31', '2001-01-31', '2002-01-31', '2003-01-31',
               '2004-01-31', '2005-01-31', '2006-01-31', '2007-01-31',
               '2008-01-31', '2009-01-31', '2010-01-31', '2011-01-31',
               '2012-01-31', '2013-01-31', '2014-01-31', '2015-01-31',
               '2016-01-31', '2017-01-31', '2018-01-31', '2019-01-31',
               '2020-01-31', '2021-01-31'],
              dtype='datetime64[ns]', freq='A-JAN')

In [50]:
pd.date_range('2000-01-01','2000-03-01',freq = 'W-FRI') # Friday of each week

DatetimeIndex(['2000-01-07', '2000-01-14', '2000-01-21', '2000-01-28',
               '2000-02-04', '2000-02-11', '2000-02-18', '2000-02-25'],
              dtype='datetime64[ns]', freq='W-FRI')

In [48]:
#period is an object that simpler and easier to handle as index. TimeStamp has issues like timezone. 
#Period is just a label of time and can be changed to different frequency. 
pd.Period(2000,freq = 'B')

Period('2000-01-03', 'B')

## DownSampling and UpSampling 
Change the aggregation level of time series.

In [62]:
rng = pd.date_range('2000-01-01',periods = 100,freq = 'min')
ts = pd.Series(np.random.randn(len(rng)),index = rng)
ts.resample('5min',label = 'right').mean()

2000-01-01 00:05:00    0.572602
2000-01-01 00:10:00   -0.386194
2000-01-01 00:15:00    0.603076
2000-01-01 00:20:00   -0.462564
2000-01-01 00:25:00    0.048983
2000-01-01 00:30:00    0.667300
2000-01-01 00:35:00    0.448396
2000-01-01 00:40:00   -0.362919
2000-01-01 00:45:00   -0.514536
2000-01-01 00:50:00    0.551266
2000-01-01 00:55:00    0.914251
2000-01-01 01:00:00   -0.374473
2000-01-01 01:05:00   -0.720779
2000-01-01 01:10:00    0.377241
2000-01-01 01:15:00   -0.148596
2000-01-01 01:20:00    0.214643
2000-01-01 01:25:00   -0.583427
2000-01-01 01:30:00    0.041354
2000-01-01 01:35:00   -0.572061
2000-01-01 01:40:00   -0.047272
Freq: 5T, dtype: float64

In [63]:
ts.resample('s',label = 'right').ffill()

2000-01-01 00:00:00    0.422013
2000-01-01 00:00:01    0.422013
2000-01-01 00:00:02    0.422013
2000-01-01 00:00:03    0.422013
2000-01-01 00:00:04    0.422013
                         ...   
2000-01-01 01:38:56   -1.421992
2000-01-01 01:38:57   -1.421992
2000-01-01 01:38:58   -1.421992
2000-01-01 01:38:59   -1.421992
2000-01-01 01:39:00   -1.358872
Freq: S, Length: 5941, dtype: float64

## Moving Window Functions

## Exponentially Weighted Functions

## Binary Moving Window Functions

## User Defined Moving Window Functions