In [None]:
import pandas as pd
import numpy as np
import datetime

import matplotlib.pyplot as plt
import seaborn as sns

# set figure size
plt.rcParams["figure.figsize"] = (8,6)

### Datetime objects in Python, Numpy and Pandas

##### Native Python dates and times

In [None]:
# Python: datetime module



In [None]:
# Once you have a datetime object, you can do things like printing the day of the week:



#### Numpy native time series dtype

In [None]:
# Now can quickly do vectorized operations on it!



##### Pandas time series data type:

- Timestamp
- 'DatetimeIndex' - Datetime object as Index

In [None]:
# We can use various attributes of datetime object to figure out day, month, year, weekday etc


In [None]:
# weekday?


Timestamp documentation: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html

## Demo: Working with Timeseries data in pandas

In [None]:
# We will use Ethererum price data from 2017-2020 to demo Pandas capability for time-series data

sheet_url = 'https://docs.google.com/spreadsheets/d/1kTrAFSrr-xP3REs0Lly0TdV4ekrHahBXLg9r5qKxmV8//export?format=csv&gid='



In [None]:
# check for nulls


In [None]:
# use pandas .plot()



### Time series analysis: How to make pandas time-aware and leverage inbuild functionality:

1. Convert 'date/time' column to datetime object in Pandas (pd.to_datetime())
2. Set the datetime column as Index
3. Sort the datetime index

In [None]:
# pandas to_datetime method can convert strings/object to datetime object (aka Timestamp object).
# Most of time pandas is smart enough to parse the date without any help.



In [None]:
# Sometimes we have to specify the format of the datatime string to help pandas parse the date/time)



Documentation on strftime() Format Codes:  
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [None]:
# Step 1: Lets convert the 'Date' column in our df to pandas datetime object using pd.to_datetime()




In [None]:
# confirm if the 'Date' column is indeed a datetime object now



In [None]:
# Step 2 and 3 combined: Set 'Date' column as Index and sort the index:



In [None]:
# What is the Index type now?



In [None]:
# lets plot the closing price again and how it compares to previous plot



#### Pandas *datetimeIndex* attributes

https://pandas.pydata.org/docs/reference/api/pandas.DatetimeIndex.html

In [None]:
# add a new columns called 'day' which is the 'weekday'



In [None]:
# Can I use groupby to see mean trading volume by day:


### How to choose subset of a dataframe?

In [None]:
# choose rows for '2017-07-01' (show only first 10 values)



In [None]:
# we can also use date range



### Changing the Period

- Upsampling - increasing frequency
    - This will result in empty observations which you may choose to fill with .ffill(),  .bfill(), or some other method.

- Downsampling - reducing frequency

In [None]:
url = "https://gist.githubusercontent.com/ryanorsinger/b309f8db19e0ca71b213d4877d835e77/raw/f5841017310e2f4ca070b313529ceec2375336ba/coffee_consumption.csv"


In [None]:
#Upsample



#### What did we do our dataframe (size?)



#### Code Description   	
D	Calendar day	       
W	Weekly		  
M	Month end	          
Q	Quarter end            
A	Year end	         
H	Hours	              
T	Minutes		
S	Seconds		
L	Milliseonds		
U	Microseconds		
N	nanoseconds		
   


In [None]:
# Another example. Now select sample every 45 minutes



In [None]:
# How can we take care of new NaNs introduced in our data by asfreq method?

# use ffill/padding



In [None]:
#  how can we backfill the data:



In [None]:
# You can also do use fillna to fill NaNs with certain chosen value:



When to use 'ffill' or 'bfill' or None?

#### How can we go from more frequent to less frequent (Downsample)

In [None]:
# We can use .asfreq method. Here we go from data every hour to every 3 hours

#downsample

# show only first 10 values:


In [None]:
# selecting data based on daily frequency:



#### What does the code above do to your dataframe content and size?

### Resampling 

- Fundamentally data aggregation (similar to groupby but for dates)
    - as opposed to afreq which is more of a 'data selection'
- Only works on dataframes with a datetime index


In [None]:
# resample the whole dataframe every hour using mean as an aggregator



In [None]:
# resample monthly and use mean for aggregation



In [None]:
# resample yearly


In [None]:
# Plot raw data with monthly resampled data



### Rolling Averages/Windows

- Rolling average aka. moving averages, are generally used to smooth out short-term fluctuations in time series data and highlight long-term trends

In [None]:
# What is the rolling average of 4 hours?



In [None]:
# Resampling and rolling average together



In [None]:
# %matplotlib notebook




In [None]:
# Min/max or any other aggregation


### Lagging and Lead

- Shift: move the data backwards and forwards by a given amount
- diff: find the difference with the previous observation (or a specified further back observation)
- Can be used to computing differences over time

In [None]:
# Use 'shift' to move the timeseries into the future



In [None]:
# create a new column 'diff(1)'


In [None]:
plt.plot(df.Close)

In [None]:
# Use for shift/diff to calculate 1 year rate of return

plt.ylabel('% Return on Investment')
plt.title('1-Year Rate of return on ETH')
plt.xlim(pd.Timestamp('2017-07'), pd.Timestamp('2019-06'))

### Time Zone

In [None]:
# datetime with no timezone specified (sometimes called timezone naive datetime object)


In [None]:
#specify timezone



In [None]:
# convert to a different timezone



In [None]:
# What is date and time right now? Take any datetime object and use .now() method



### Timedelta

In [None]:
# time delta between now (today's date) and t1



In [None]:
# Can we find time delta between t and t1?

t, t1


In [None]:
# To convert the above Timedelta object to a number:

