# Introduction to Time Series with Pandas

## Python Datetime Review

In [1]:
from datetime import datetime

In [2]:
# To illustrate the order of arguments
my_year = 2020
my_month = 6
my_day = 21
my_hour = 13
my_minute = 30
my_second = 15

In [3]:
my_date = datetime(my_year,my_month,my_day)

In [4]:
my_date 

datetime.datetime(2020, 6, 21, 0, 0)

In [5]:
my_date_time = datetime(my_year,my_month,my_day,my_hour,my_minute,my_second)

In [6]:
my_date_time

datetime.datetime(2020, 6, 21, 13, 30, 15)

In [7]:
my_date.day

21

In [8]:
my_date_time.hour

13

## NumPy Datetime Arrays

In [9]:
import numpy as np

In [10]:
# CREATE AN ARRAY FROM THREE DATES
np.array(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64')

array(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64[D]')

In [11]:
np.array(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64[h]')

array(['2016-03-15T00', '2017-05-24T00', '2018-08-09T00'],
      dtype='datetime64[h]')

In [12]:
np.array(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64[Y]')

array(['2016', '2017', '2018'], dtype='datetime64[Y]')

## NumPy Date Ranges

In [14]:
# AN ARRAY OF DATES FROM 6/1/18 TO 6/22/18 SPACED ONE WEEK APART
np.arange('2020-06-01', '2020-06-23', 7, dtype='datetime64[D]')

array(['2020-06-01', '2020-06-08', '2020-06-15', '2020-06-22'],
      dtype='datetime64[D]')

By omitting the step value we can obtain every value based on the precision.

In [15]:
# AN ARRAY OF DATES FOR EVERY YEAR FROM 1968 TO 1975
np.arange('1968', '1976', dtype='datetime64[Y]')

array(['1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975'],
      dtype='datetime64[Y]')

## Pandas Datetime Index

In [16]:
import pandas as pd

The simplest way to build a DatetimeIndex is with the <tt><strong>pd.date_range()</strong></tt> method:

In [18]:
# THE WEEK OF JULY 8TH, 2020
idx = pd.date_range('7/8/2020', periods=7, freq='D')
#idx = pd.date_range('mm/dd/yyyy', periods=7, freq='Day')
idx

DatetimeIndex(['2020-07-08', '2020-07-09', '2020-07-10', '2020-07-11',
               '2020-07-12', '2020-07-13', '2020-07-14'],
              dtype='datetime64[ns]', freq='D')

Another way is to convert incoming text with the <tt><strong>pd.to_datetime()</strong></tt> method:

In [23]:
idx = pd.to_datetime(['Jan 01, 2020','1/2/20','03-Jan-2020',None])
idx

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', 'NaT'], dtype='datetime64[ns]', freq=None)

In [35]:
idx = pd.to_datetime(['1/2/2020','2/4/2020',],format='%m/%d/%Y')
idx
# O/P in YYYY-mm-dd

DatetimeIndex(['2020-01-02', '2020-02-04'], dtype='datetime64[ns]', freq=None)

A third way is to pass a list or an array of datetime objects into the <tt><strong>pd.DatetimeIndex()</strong></tt> method:

In [36]:
# Create a NumPy datetime array
some_dates = np.array(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64[D]')
some_dates

array(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64[D]')

In [37]:
# Convert to an index
idx = pd.DatetimeIndex(some_dates)
idx

DatetimeIndex(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64[ns]', freq=None)

## Pandas Datetime Analysis

In [38]:
# Create some random data
data = np.random.randn(3,2)
cols = ['A','B']
print(data)

[[-0.59824482 -0.92228719]
 [-0.75601625  0.81730779]
 [ 0.94955949  0.0641874 ]]


In [40]:
# Create a DataFrame with our random data, our date index, and our columns
df = pd.DataFrame(data,index=idx,columns=cols)
df

Unnamed: 0,A,B
2016-03-15,-0.598245,-0.922287
2017-05-24,-0.756016,0.817308
2018-08-09,0.949559,0.064187


Now we can perform a typical analysis of our DataFrame

In [41]:
df.index

DatetimeIndex(['2016-03-15', '2017-05-24', '2018-08-09'], dtype='datetime64[ns]', freq=None)

In [42]:
# Latest Date Value
df.index.max()

Timestamp('2018-08-09 00:00:00')

In [43]:
# Latest Date Index Location
df.index.argmax()

2

In [44]:
# Earliest Date Value
df.index.min()

Timestamp('2016-03-15 00:00:00')

In [45]:
# Earliest Date Index Location
df.index.argmin()

0