## PANDAS TIME SERIES

In [1]:
from datetime import datetime

In [2]:
datetime.now()

datetime.datetime(2017, 4, 14, 15, 28, 1, 319894)

In [3]:
datetime(year = 2015,month=7,day=4)

datetime.datetime(2015, 7, 4, 0, 0)

In [4]:
# How to convert a string into a date time object
from dateutil import parser
date= parser.parse("24th November 2017")

In [5]:
date

datetime.datetime(2017, 11, 24, 0, 0)

In [6]:
# How to find what weekday it is
date.strftime('%A')

'Friday'

In [7]:
date.strftime('%Y')

'2017'

In [8]:
date.strftime('%m')

'11'

In [9]:
import numpy as np

In [10]:
date = np.array('2015-07-24', dtype=np.datetime64)

In [11]:
date

array(datetime.date(2015, 7, 24), dtype='datetime64[D]')

In [12]:
d = date+np.arange(10)

In [13]:
d

array(['2015-07-24', '2015-07-25', '2015-07-26', '2015-07-27',
       '2015-07-28', '2015-07-29', '2015-07-30', '2015-07-31',
       '2015-08-01', '2015-08-02'], dtype='datetime64[D]')

In [14]:
s=d[9]

In [15]:
str(s)

'2015-08-02'

In [16]:
np.arange(12)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

### CODE BREAKDOWN
#M --> Month
#W --> Week
#D --> Day
#h --> hour
#m --> Minute
#s --> second
#ms --> Millisecond
#us --> Microsecond
#ns --> Nanosecond
#ps --> Picosecond
#fs --> Femtosecond
#as --> Attosecond

## Pandas - DATE & TIME

In [17]:
import pandas as pd

In [18]:
#Convert string into timestamp object
date = pd.to_datetime('5th of July 2019')

In [19]:
date

Timestamp('2019-07-05 00:00:00')

In [20]:
date + pd.to_timedelta(np.arange(12),'M')

DatetimeIndex(['2019-07-05 00:00:00', '2019-08-04 10:29:06',
               '2019-09-03 20:58:12', '2019-10-04 07:27:18',
               '2019-11-03 17:56:24', '2019-12-04 04:25:30',
               '2020-01-03 14:54:36', '2020-02-03 01:23:42',
               '2020-03-04 11:52:48', '2020-04-03 22:21:54',
               '2020-05-04 08:51:00', '2020-06-03 19:20:06'],
              dtype='datetime64[ns]', freq=None)

In [21]:
date+pd.to_timedelta(np.arange(12),'Y')

DatetimeIndex(['2019-07-05 00:00:00', '2020-07-04 05:49:12',
               '2021-07-04 11:38:24', '2022-07-04 17:27:36',
               '2023-07-04 23:16:48', '2024-07-04 05:06:00',
               '2025-07-04 10:55:12', '2026-07-04 16:44:24',
               '2027-07-04 22:33:36', '2028-07-04 04:22:48',
               '2029-07-04 10:12:00', '2030-07-04 16:01:12'],
              dtype='datetime64[ns]', freq=None)

### INDEXING BY TIME:
- Objective is to use datetime as index column and not a regular column
- Before using datetime as index, convert date column as index

In [22]:
index = pd.DatetimeIndex(['2017-07-04','2017-08-04','2017-09-04','2017-10-04'])

In [23]:
index

DatetimeIndex(['2017-07-04', '2017-08-04', '2017-09-04', '2017-10-04'], dtype='datetime64[ns]', freq=None)

In [24]:
data = pd.Series([0,1,2,3],index = index)

In [25]:
data

2017-07-04    0
2017-08-04    1
2017-09-04    2
2017-10-04    3
dtype: int64

In [26]:
data['2017-07-04':'2017-09-04']

2017-07-04    0
2017-08-04    1
2017-09-04    2
dtype: int64

In [27]:
index = pd.DatetimeIndex(['2017-07-04','2017-08-04','2018-09-04','2018-10-04'])
data = pd.Series([0,1,2,3],index = index)

In [28]:
data['2017']

2017-07-04    0
2017-08-04    1
dtype: int64

In [29]:
pd.date_range('2017',periods=10,freq='A')

DatetimeIndex(['2017-12-31', '2018-12-31', '2019-12-31', '2020-12-31',
               '2021-12-31', '2022-12-31', '2023-12-31', '2024-12-31',
               '2025-12-31', '2026-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

In [30]:
pd.date_range('2018',periods=9,freq='m')

DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
               '2018-05-31', '2018-06-30', '2018-07-31', '2018-08-31',
               '2018-09-30'],
              dtype='datetime64[ns]', freq='M')

In [31]:
date = pd.to_datetime("5th of July 2019")

In [32]:
pd.date_range('2017-07-03',periods=10,freq='5w')

DatetimeIndex(['2017-07-09', '2017-08-13', '2017-09-17', '2017-10-22',
               '2017-11-26', '2017-12-31', '2018-02-04', '2018-03-11',
               '2018-04-15', '2018-05-20'],
              dtype='datetime64[ns]', freq='5W-SUN')

In [33]:
pd.timedelta_range(0,periods=9,freq='4H30T30D')

TimedeltaIndex([  '0 days 00:00:00',  '30 days 04:30:00',  '60 days 09:00:00',
                 '90 days 13:30:00', '120 days 18:00:00', '150 days 22:30:00',
                '181 days 03:00:00', '211 days 07:30:00', '241 days 12:00:00'],
               dtype='timedelta64[ns]', freq='43470T')

#### CODE DESCRIPTION
-D - Calendar day
-B - Business day
-W - weekly
-M - Month
-BM - Business Month
-Q - Quarter 
-BQ - Business Quarter
-A - Year end
-BA - Business Year
-H - Hours
-BH - Business Hours
-T - Minutes
-S - Seconds
-L - Milliseconds
-U - Microseconds
-N - Nanoseconds

## Pandas_datareader import

In [34]:
from pandas_datareader import data

In [35]:
goog = data.DataReader('GOOG',start='2014', end = '2016', data_source='google')

In [36]:
goog.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,557.17,558.32,553.58,556.0,
2014-01-03,556.94,557.91,551.91,551.95,
2014-01-06,555.95,558.87,552.67,558.1,
2014-01-07,561.94,569.28,560.02,568.86,
2014-01-08,572.43,573.09,566.08,570.04,


In [37]:
len(goog)

504

In [38]:
goog['2015-12-01':'2015-12-31']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-12-01,747.11,768.95,746.7,767.04,2129940.0
2015-12-02,768.9,775.96,758.96,762.38,2195686.0
2015-12-03,766.01,769.0,745.63,752.54,2590641.0
2015-12-04,753.1,768.49,750.0,766.81,2757283.0
2015-12-07,767.77,768.73,755.09,763.25,1812314.0
2015-12-08,757.89,764.8,754.2,762.37,1829475.0
2015-12-09,759.17,764.23,737.0,751.61,2699990.0
2015-12-10,752.85,755.85,743.83,749.46,1988380.0
2015-12-11,741.16,745.71,736.75,738.87,2224410.0
2015-12-14,741.79,748.73,724.17,747.77,2412497.0


In [39]:
goog.max()

Open           776.60
High           779.98
Low            766.90
Close          776.60
Volume    11164943.00
dtype: float64

In [40]:
data = pd.read_csv('https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD',index_col='Date',parse_dates=True)

In [43]:
data.columns = ['East','West']

In [44]:
data

Unnamed: 0_level_0,East,West
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-10-03 00:00:00,4.0,9.0
2012-10-03 01:00:00,4.0,6.0
2012-10-03 02:00:00,1.0,1.0
2012-10-03 03:00:00,2.0,3.0
2012-10-03 04:00:00,6.0,1.0
2012-10-03 05:00:00,21.0,10.0
2012-10-03 06:00:00,105.0,50.0
2012-10-03 07:00:00,257.0,95.0
2012-10-03 08:00:00,291.0,146.0
2012-10-03 09:00:00,172.0,104.0


In [45]:
data['total'] = data['East']+ data['West']

In [46]:
data

Unnamed: 0_level_0,East,West,total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-10-03 00:00:00,4.0,9.0,13.0
2012-10-03 01:00:00,4.0,6.0,10.0
2012-10-03 02:00:00,1.0,1.0,2.0
2012-10-03 03:00:00,2.0,3.0,5.0
2012-10-03 04:00:00,6.0,1.0,7.0
2012-10-03 05:00:00,21.0,10.0,31.0
2012-10-03 06:00:00,105.0,50.0,155.0
2012-10-03 07:00:00,257.0,95.0,352.0
2012-10-03 08:00:00,291.0,146.0,437.0
2012-10-03 09:00:00,172.0,104.0,276.0
