In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from datetime import datetime

## 1. Datetime index

In [4]:
# Example1 :

my_yr = 2023
my_mon = 3
my_day = 23

my_hr = 9
my_min = 4
my_sec = 15

In [5]:
my_date = datetime(year = my_yr , month = my_mon ,day = my_day)
my_date

# it has returned datetime object

datetime.datetime(2023, 3, 23, 0, 0)

In [6]:
my_datetime = datetime(year = my_yr , month = my_mon ,day = my_day,
                       hour = my_hr , minute = my_min , second = my_sec)

my_datetime

datetime.datetime(2023, 3, 23, 9, 4, 15)

In [7]:
# accessing some of the attributes
my_date.year

2023

In [8]:
my_datetime.month

3

In [9]:
my_datetime.second

15

In [10]:
# Example 2 : Using numpy to create datetime object

arr = np.array(['2023-03-15','2023-03-18','2023-03-20','2023-03-22' ], dtype = 'datetime64')
arr

# dtype='datetime64[D]' : [D] means day level precision

array(['2023-03-15', '2023-03-18', '2023-03-20', '2023-03-22'],
      dtype='datetime64[D]')

In [11]:
# Example 3 : Using np.arange to create datetime object

arr2 = np.arange('2023-03-15','2023-03-23',2, dtype = 'datetime64')
arr2

# dtype='datetime64[D]' : [D] means day level precision

array(['2023-03-15', '2023-03-17', '2023-03-19', '2023-03-21'],
      dtype='datetime64[D]')

In [12]:
# Example 4 : Using np.arange to create datetime object

arr3 = np.arange('2015','2023', dtype = 'datetime64')
arr3

# dtype='datetime64[Y] : [Y] means Year level precision

array(['2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022'],
      dtype='datetime64[Y]')

In [13]:
# Example 5 : Using pandas to create datetime object

pd.date_range(start = '2023-03-10' ,end = '2023-03-24' ,periods = 7 )

# Returned equal time apart datetime object

DatetimeIndex(['2023-03-10 00:00:00', '2023-03-12 08:00:00',
               '2023-03-14 16:00:00', '2023-03-17 00:00:00',
               '2023-03-19 08:00:00', '2023-03-21 16:00:00',
               '2023-03-24 00:00:00'],
              dtype='datetime64[ns]', freq=None)

In [14]:
# Example 6 : Using pandas to create datetime object

pd.date_range(start = '2023-03-10'  ,periods = 7 , freq = 'D' )

# Returned equal time apart datetime object

DatetimeIndex(['2023-03-10', '2023-03-11', '2023-03-12', '2023-03-13',
               '2023-03-14', '2023-03-15', '2023-03-16'],
              dtype='datetime64[ns]', freq='D')

In [15]:
# Example 7 : Using pandas to create datetime object

# start must be in standard datetime format
pd.date_range(start = '1 Jan, 2023'  ,periods = 7 , freq = 'D' ) 

# Returned equal time apart datetime object

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06', '2023-01-07'],
              dtype='datetime64[ns]', freq='D')

In [16]:
# Example 8: Using pandas to create datetime object

# start must be in standard datetime format
pd.to_datetime(['1/3/2023','3/3/2023','6/3/2023'] ,format = "%d/%m/%Y") 

DatetimeIndex(['2023-03-01', '2023-03-03', '2023-03-06'], dtype='datetime64[ns]', freq=None)

In [17]:
# Example 8: Using pandas to create datetime object

testdata = np.random.randn(3,2)
col = ['A' , 'B']

idx = pd.date_range(start = '20 Mar, 2023'  ,periods = 3 , freq = 'D')

In [18]:
pd.DataFrame(data = testdata , columns = col , index =idx)

Unnamed: 0,A,B
2023-03-20,-0.899021,0.814515
2023-03-21,2.616113,1.371483
2023-03-22,1.106739,-0.783097


## 2. Time resampling

In [19]:
# When calling .resample() we first need to pass in a 'rule' parameter, 
# then we need to call some sort of aggregation function.

# The 'rule' parameter describes the frequency with which to apply the aggregation function
# (daily, monthly, yearly, etc.)

# 'rule' parameter : is passed in using an "offset alias" 

# The aggregation function is needed because, due to resampling, we need some sort of 
# mathematical rule to join the rows (mean, sum, count, etc.)

TIME SERIES OFFSET ALIASES

ALIAS	DESCRIPTION

B	    business day frequency
C	    custom business day frequency (experimental)
D	    calendar day frequency
W	    weekly frequency
M	    month end frequency
SM	    semi-month end frequency (15th and end of month)
BM	    business month end frequency
CBM	    custom business month end frequency
MS	    month start frequency
SMS	    semi-month start frequency (1st and 15th)
BMS	    business month start frequency
CBMS    custom business month start frequency
Q	    quarter end frequency

 
ALIAS	DESCRIPTION
BQ	    business quarter endfrequency
QS	    quarter start frequency
BQS	    business quarter start frequency
A	    year end frequency
BA	    business year end frequency
AS	    year start frequency
BAS	    business year start frequency
BH	    business hour frequency
H	    hourly frequency
T,      min	minutely frequency
S	    secondly frequency
L,      ms	milliseconds
U,      us	microseconds
N	    nanoseconds

In [20]:
df = pd.read_csv("starbucks.csv" ,index_col = 'Date')
df.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-02,38.0061,6906098
2015-01-05,37.2781,11623796
2015-01-06,36.9748,7664340
2015-01-07,37.8848,9732554
2015-01-08,38.4961,13170548


In [21]:
df.index

# simple index not a datetime index

Index(['2015-01-02', '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08',
       '2015-01-09', '2015-01-12', '2015-01-13', '2015-01-14', '2015-01-15',
       ...
       '2018-12-17', '2018-12-18', '2018-12-19', '2018-12-20', '2018-12-21',
       '2018-12-24', '2018-12-26', '2018-12-27', '2018-12-28', '2018-12-31'],
      dtype='object', name='Date', length=1006)

In [22]:
df = pd.read_csv("starbucks.csv" ,index_col = 'Date' ,parse_dates = True)
df.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-02,38.0061,6906098
2015-01-05,37.2781,11623796
2015-01-06,36.9748,7664340
2015-01-07,37.8848,9732554
2015-01-08,38.4961,13170548


In [23]:
df.index

DatetimeIndex(['2015-01-02', '2015-01-05', '2015-01-06', '2015-01-07',
               '2015-01-08', '2015-01-09', '2015-01-12', '2015-01-13',
               '2015-01-14', '2015-01-15',
               ...
               '2018-12-17', '2018-12-18', '2018-12-19', '2018-12-20',
               '2018-12-21', '2018-12-24', '2018-12-26', '2018-12-27',
               '2018-12-28', '2018-12-31'],
              dtype='datetime64[ns]', name='Date', length=1006, freq=None)

In [24]:
# Example 1 : daily --> monthly start

# Using alias = MS (Monthly start frequency)   

df.resample(rule = 'MS').mean()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01,38.72947,13368000.0
2015-02-01,42.862432,7908719.0
2015-03-01,44.321836,8905969.0
2015-04-01,45.508914,8540996.0
2015-05-01,47.48865,6723716.0
2015-06-01,49.761627,6856079.0
2015-07-01,52.437959,8087759.0
2015-08-01,52.797976,10648810.0
2015-09-01,53.006186,8935548.0
2015-10-01,57.051373,8893461.0


In [25]:
# Example 2 : daily --> monthly end

# Using alias = M (Monthly end frequency)   

df.resample(rule = 'M').mean()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-31,38.72947,13368000.0
2015-02-28,42.862432,7908719.0
2015-03-31,44.321836,8905969.0
2015-04-30,45.508914,8540996.0
2015-05-31,47.48865,6723716.0
2015-06-30,49.761627,6856079.0
2015-07-31,52.437959,8087759.0
2015-08-31,52.797976,10648810.0
2015-09-30,53.006186,8935548.0
2015-10-31,57.051373,8893461.0


In [26]:
# Example 3 : daily --> quarter end frequency

# Using alias = Q (quarter end frequency)   

df.resample(rule = 'Q').mean()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-03-31,42.033705,10058310.0
2015-06-30,47.622476,7375698.0
2015-09-30,52.742539,9206285.0
2015-12-31,57.498228,8002620.0
2016-03-31,55.171702,10938320.0
2016-06-30,54.050434,8582716.0
2016-09-30,53.350578,8996589.0
2016-12-31,53.040917,8753118.0
2017-03-31,54.447013,9846263.0
2017-06-30,58.427046,8131475.0


In [27]:
# Example 4 : daily --> year start frequency

# Using alias = Q (year start frequency)   

df.resample(rule = 'AS').mean()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01,50.0781,8649190.0
2016-01-01,53.891732,9300633.0
2017-01-01,55.45731,9296078.0
2018-01-01,56.870005,11228830.0


In [28]:
# Example 5 : daily --> yearly end

# Using alias = A (year end frequency)   

df.resample(rule = 'A').mean()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-12-31,50.0781,8649190.0
2016-12-31,53.891732,9300633.0
2017-12-31,55.45731,9296078.0
2018-12-31,56.870005,11228830.0


In [29]:
# Example 6 : custom function

def first_day(entry):
    
    if len(entry):
        
        # returning first entry
        print(entry)
        return entry[0]
    

In [30]:
df.resample(rule = 'A').apply(first_day)

Date
2015-01-02    38.0061
2015-01-05    37.2781
2015-01-06    36.9748
2015-01-07    37.8848
2015-01-08    38.4961
               ...   
2015-12-24    57.0255
2015-12-28    56.9026
2015-12-29    57.7913
2015-12-30    57.4982
2015-12-31    56.7513
Name: Close, Length: 252, dtype: float64
Date
2016-01-04    55.0780
2016-01-05    55.4467
2016-01-06    54.9551
2016-01-07    53.5938
2016-01-08    53.5370
               ...   
2016-12-23    54.7028
2016-12-27    54.5589
2016-12-28    54.0695
2016-12-29    54.0407
2016-12-30    53.2731
Name: Close, Length: 252, dtype: float64
Date
2017-01-03    53.1100
2017-01-04    53.7241
2017-01-05    54.1750
2017-01-06    54.8179
2017-01-09    55.8446
               ...   
2017-12-22    56.0018
2017-12-26    55.8454
2017-12-27    55.9724
2017-12-28    56.5002
2017-12-29    56.1288
Name: Close, Length: 251, dtype: float64
Date
2018-01-02    56.3243
2018-01-03    57.3798
2018-01-04    57.5948
2018-01-05    58.2594
2018-01-08    57.9662
               ...   

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-12-31,38.0061,6906098
2016-12-31,55.078,13521544
2017-12-31,53.11,7809307
2018-12-31,56.3243,7215978
