### Imports

In [92]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
yf.pdr_override()
from pandas_datareader.data import DataReader


### Extract and View Data

In [93]:
start_date = '2017-01-01'
end_date = '2022-06-01'
symbol ='ETH-USD'

df = DataReader(symbol,start_date,end_date)
df.head()


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-11-09,308.644989,329.451996,307.056,320.884003,320.884003,893249984
2017-11-10,320.67099,324.717987,294.541992,299.252991,299.252991,885985984
2017-11-11,298.585999,319.453003,298.191986,314.681,314.681,842300992
2017-11-12,314.690002,319.153015,298.513,307.90799,307.90799,1613479936
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984


In [94]:
df.tail(2)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-05-30,1811.885986,2005.210815,1804.456055,1996.441284,1996.441284,19580808705
2022-05-31,1996.408081,2005.490967,1932.352051,1942.328003,1942.328003,18363115560


In [95]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1665 entries, 2017-11-09 to 2022-05-31
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1665 non-null   float64
 1   High       1665 non-null   float64
 2   Low        1665 non-null   float64
 3   Close      1665 non-null   float64
 4   Adj Close  1665 non-null   float64
 5   Volume     1665 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 91.1 KB


# Working with Returns

In [96]:
df['Returns'] = df['Close'].pct_change()

# we can drop or fill the NaN values
# df.fillna(0,inplace=True)

df.dropna(inplace=True)

df.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-10,320.67099,324.717987,294.541992,299.252991,299.252991,885985984,-0.067411
2017-11-11,298.585999,319.453003,298.191986,314.681,314.681,842300992,0.051555
2017-11-12,314.690002,319.153015,298.513,307.90799,307.90799,1613479936,-0.021523
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984,0.028606
2017-11-14,316.763,340.177002,316.763,337.631012,337.631012,1069680000,0.066037
2017-11-15,337.963989,340.911987,329.812988,333.356995,333.356995,722665984,-0.012659
2017-11-16,333.442993,336.158997,323.605988,330.924011,330.924011,797254016,-0.007298
2017-11-17,330.166992,334.963989,327.52301,332.394012,332.394012,621732992,0.004442
2017-11-18,331.980011,349.615997,327.687012,347.612,347.612,649638976,0.045783
2017-11-19,347.401001,371.290985,344.73999,354.385986,354.385986,1181529984,0.019487


In [97]:
# Calculating the log returns is the most accurate

df['log_returns'] = np.log(df['Close']/df['Close'].shift(1))
df.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,log_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-11-10,320.67099,324.717987,294.541992,299.252991,299.252991,885985984,-0.067411,
2017-11-11,298.585999,319.453003,298.191986,314.681,314.681,842300992,0.051555,0.05027
2017-11-12,314.690002,319.153015,298.513,307.90799,307.90799,1613479936,-0.021523,-0.021758
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984,0.028606,0.028204
2017-11-14,316.763,340.177002,316.763,337.631012,337.631012,1069680000,0.066037,0.063948
2017-11-15,337.963989,340.911987,329.812988,333.356995,333.356995,722665984,-0.012659,-0.01274
2017-11-16,333.442993,336.158997,323.605988,330.924011,330.924011,797254016,-0.007298,-0.007325
2017-11-17,330.166992,334.963989,327.52301,332.394012,332.394012,621732992,0.004442,0.004432
2017-11-18,331.980011,349.615997,327.687012,347.612,347.612,649638976,0.045783,0.044766
2017-11-19,347.401001,371.290985,344.73999,354.385986,354.385986,1181529984,0.019487,0.0193


In [98]:
# Cumulative Sum Returns

df['l_returns_cumsum'] = df['log_returns'].cumsum()
df.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,log_returns,l_returns_cumsum
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-11-10,320.67099,324.717987,294.541992,299.252991,299.252991,885985984,-0.067411,,
2017-11-11,298.585999,319.453003,298.191986,314.681,314.681,842300992,0.051555,0.05027,0.05027
2017-11-12,314.690002,319.153015,298.513,307.90799,307.90799,1613479936,-0.021523,-0.021758,0.028512
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984,0.028606,0.028204,0.056716
2017-11-14,316.763,340.177002,316.763,337.631012,337.631012,1069680000,0.066037,0.063948,0.120664


In [99]:
# Normalize Log Returns
df['l_returns_normalized'] = np.exp(df['l_returns_cumsum']) - 1
df.fillna(0,inplace=True)
df.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,log_returns,l_returns_cumsum,l_returns_normalized
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-11-10,320.67099,324.717987,294.541992,299.252991,299.252991,885985984,-0.067411,0.0,0.0,0.0
2017-11-11,298.585999,319.453003,298.191986,314.681,314.681,842300992,0.051555,0.05027,0.05027,0.051555
2017-11-12,314.690002,319.153015,298.513,307.90799,307.90799,1613479936,-0.021523,-0.021758,0.028512,0.028922
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984,0.028606,0.028204,0.056716,0.058355
2017-11-14,316.763,340.177002,316.763,337.631012,337.631012,1069680000,0.066037,0.063948,0.120664,0.128246
