In [None]:
# Bismillah

In [None]:
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('precision', 7)
from pandas_datareader import data

In [None]:
type(2014-8-1)
# it looks date to us, but for pandas, it is just an int. So, the operations permitted on date cannot be performed
# on this int.
# If we enter date in this format in datetime function, shown below, then we will get error since pandas does not
# recognize date in this format as a date.

In [None]:
datetime.datetime(2014, 8, 1, 9, 30, 45, 100000)
# datetime object created. So, relevant operations can be performed on it now.
# year, month, day, hours, minutes, seconds, and microseconds must be seperated by a comma.

In [None]:
type(datetime.datetime(2014, 8, 1, 9, 30, 45, 100000))

In [None]:
dates = [datetime.datetime(2014, 8, 1), datetime.datetime(2014, 8, 2)]
dates
# a list of datetime objects created.

In [None]:
type(dates)

In [None]:
type(dates[0])

In [None]:
dti = pd.DatetimeIndex(dates)
dti
# datetime index created from a list of datetime objects. Each item in DatetimeIndex is an object of Timestamp class.

In [None]:
type(dti)

In [None]:
type(dti[0])

In [None]:
ts = pd.Series(np.random.randn(2))
ts
# the index is zero-based integer index.

In [None]:
type(ts)

In [None]:
ts1 = pd.Series(np.random.randn(2), dates)
ts1
# the index is datetime index rather than zero-based integer index.
# a series will automatically construct a DatetimeIndex as its index when passing a list of datetime objects (dates)
# as the index parameter.

In [None]:
type(ts1)

In [None]:
ts2 = pd.Series(np.random.randn(2), dti)
ts2
# here we have directly passed the DatetimeIndex as the index parameter.

In [None]:
type(ts2)

In [None]:
ts2.index
# the index of the series is DatetimeIndex as can be confirmed from the following type command.

In [None]:
type(ts2.index)

In [None]:
# Important: The series object has taken the datetime objects (contained in the variable, dates) and constructed a 
# DatetimeIndex from the date values contained in the datetime objects. Each value of the DatetimeIndex is the 
# Timestamp object and each element (Timestamp) of the DatetimeIndex can be used to access the corresponding value in
# the Series object. This is demonstrated in the following:

In [None]:
ts2

In [None]:
ts2['2014-08-01']
# first row retreived using [] without slicing, loc or iloc. It is possible because there is only one column since
# this is a series. So, all we can give is an argument for a row or rows so pandas doesn't get confused.

In [None]:
ts2['2014-08-01':'2014-08-02']
# the end date is inclusive.

In [None]:
ts2[datetime(2014, 8, 1)]
# instead of using the date in the string with - seperator, we can use the date with , seperator without '' to produce
# the same output.

In [None]:
# The next method to create DatetimeIndex by passing a list of dates in string as shown below:

In [None]:
dates = ['2014-08-01', '2014-08-02']
dates

In [None]:
type(dates)
# list of strings.

In [None]:
type(dates[0])
# the first element (date) in the list is of type string and the same is the second element.

In [None]:
ts3 = pd.Series(np.random.randn(2), dates)
ts3
# here dates as a string have been passed as an index argument. But the pandas itself figured out that these are dates
# and converted them into DatetimeIndex as shown below.

In [None]:
type(ts3.index)

In [None]:
# convert a list of mixed type items/elements into DatetimeIndex using pandas' function, to_datetime().

In [None]:
dti2 = pd.to_datetime(['Aug 1, 2014', '2014-08-02', '2014.8.3', None])
dti2
# note that all are strings in the list except the keyword, None.
# all the strings have been converted into DatetimeIndex (pandas recognized dates as shown below:)
# None has been converted into NaT (not a time-value), which means that the source data could not be converted into
# datetime.

In [None]:
type(dti2)

In [None]:
#dti3 = pd.to_datetime(['Aug 1, 2014', '2014-08-02', '2014.8.3', 'foo'], coerce = True)
# gives error. pandas cannot parse 'foo' since it does not seem like a date. Previously, pandas recognized the keyword
# None, so didn't give the error and produced NaT. Also, the coerce parameter is not working here unlike in the book.

In [None]:
# Changing the date order: The default order is 

In [None]:
dti = pd.to_datetime(['2014, 8, 1'])
dti
# 8 is taken as a month and 1 as a date.

In [None]:
dti = pd.to_datetime(['8, 1, 2014'])
dti
# same result produced, 8 is the month and 1 is the date.

In [None]:
dti = pd.to_datetime(['8, 1, 2014'], dayfirst = True)
dti
# if we mean to say that the date is 8th of January, then we have to tell pandas that the first argument is the day or
# date by typing 'dayfirst = True' since by default pandas will treat the first value as the month and the second as
# a day or date.
# now the date is 8th of January as we desired.

In [None]:
dti[0]

In [None]:
# Using date_range function to create DatetineIndex (a series of TimeStamps):

In [None]:
dates = pd.date_range('8, 1, 2014', periods = 10)
dates

In [None]:
s1 = pd.Series(np.random.randn(10), dates)
s1

In [None]:
s1.index

In [None]:
type(s1.index)

# S&P 500 Example:

In [None]:
msft = data.DataReader('MSFT', 'yahoo', '2012-1-1', '2013-12-30')
msft[:5]

In [None]:
msft.loc['2012-01-03']
# Thus, using DatetimeIndex, any desirable part of the dataframe can be accessed for any period.

In [None]:
msftAC = msft['Adj Close']
msftAC.head(5)

In [None]:
msftAC['2012-01-03']
# Nothe that msftAC is a series, therefore, a row index (which is DatetimeIndex) can be used with [] without slicing, 
# loc or iloc. 

In [None]:
msftAC['2012-01-01':'2012-01-05']
# DatetimeIndex can easily allow access to any chunk of dataframe for any desirable period.

In [None]:
msft['2012-02']
# Note that this didn't require the use of the .loc method, as pandas first identifies this as a partial date and then
# looks along the index of the dataframe instead of column (although .loc can be used to perform the same operation).

In [None]:
msft['2012-02':'2012-02-09']
# the slice will start at the beginning of the month.

# Creating time-series with specific frequencies:

In [None]:
dates = pd.date_range('2014-08-01', '2014-10-29 23:59:00', freq = 'T')
dates
# create DatetimeIndex with hourly frequency. The default is 'D' for daily. Weekly, monthly etc is also possible.

In [None]:
np_array = np.arange(0, 90 * 60 * 24)
np_array

In [None]:
bymin = pd.Series(np_array, dates)
bymin
# the first argument for the values of the series and the second one is for the index of the series.

In [None]:
bymin['2014-08-01 12:30:00':'2014-08-01 12:59']
# the data can be extracted to the minute details.

### Representing intervals of time using periods:
#### Period () method is used to determine a period starting and ending at the desired dates with the specified frequencies. 

In [None]:
aug2014 = pd.Period('2014-08', freq = 'M')
aug2014
# period('2014-08', 'M') contains only two dates, 1st and 31 of Aug, 2014 since the frequency is monthly.

In [None]:
aug2014.start_time, aug2014.end_time
# pandas assumes the start date to be 1st of Aug, when only month (partial date) is mentioned. Then using the one 
# month frequency, it figures the end date to be 31st of Aug.

In [None]:
sep2014 = aug2014 + 1
sep2014
# operators overloading when plus sign in this carries a different meaning and operation that it's usual behaviour.
# In this case, addition of 1 to aug2014 will create the next month (sep2014) since the existing period (aug2014) is 
# based on a monthly frequency. If it were based on a quarterly frequency, then +1 will create next quarter.

In [None]:
sep2014.start_time, sep2014.end_time
# note that the pandas itself has figured out that September has 30 days unlike Aug that ended with the date 31.

In [None]:
# Period Objects: can be combined to form a PeriodIndex as shown below:

In [None]:
mp2013 = pd.period_range('2013-01-01', '2013-12-31', freq = 'M')
mp2013
# PeriodIndex has been created as shown below and confirmed in the type command.
# In PeriodIndex, the index labels are period objects whereas in DatetimeIndex the index labels are TimeStamps.

In [None]:
type(mp2013)

In [None]:
type(mp2013[0])
# PeriodIndex contains period objects.

In [None]:
for p in mp2013:
    print ("{0} {1} {2} {3}".format(p, p.freq, p.start_time, p.end_time))
# first '2013-01' is taken from 'mp2013' and put in p, then print is executed. In print, the {} represents placeholders
# for what comes in () after format. For instance,p relates with {0}, p.freq relates with {1} and so on. Note that the
# PeriodIndex has Period objects as can be seen immediately above. Therefore, they have the property of .start_time and
# .end_time.    

In [None]:
s = np.random.randn(12)
s

In [None]:
pandas_series = pd.Series(s, mp2013)
pandas_series
# this series has index labels containing Period Objects so the index is PeriodIndex rather than DatetimeIndex.
# this type of indexing is especially helpfull when we are interested in finding average prices each month rather than
# prices at specific dates.