In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [47]:
def transform_bitcoin_data(data: pd.DataFrame) -> pd.DataFrame:
    """
    This function adds the following columns to our bitcoin dataframe:
    %PriceChange: To look at price change within a day
    priceDiff: To look at the per day absolute price change
    normalizedVol: To compare relative trading activity
    7DayMovingAvgPrice: To be able to observe long-term effects
    30DayMovingAvgPrice: To be able to observe long-term effects
    7DayMovingAvgVol: To be able to observe long-term effects
    30DayMovingAvgVol: To be able to observe long-term effects
    
    Note: The first 7 or 30 values for Moving averages for a week or month, respectively,
        are NaN values because there aren't sufficient rows to calculate the average. 
        We have decided to leave these values as NaN to preserve data integrity and not fill
        them which could possibly distort our analysis.
    
    TODO: Add Event flag: Pre [1], During[2], Post[3]
    TODO: Add doctests to see if the calculations are correct
    
    :param data: bitcoin dataframe
    :return: data: bitcoin dataframe with the columns above
    """
    # Adding column for % Price Change per day 
    data['%PriceChange'] = round(((data['Close'] - data['Open']) / data['Open']) * 100, 3)
    
    # Adding column for price difference
    data['priceDiff'] = round(data['Close'] - data['Open'], 3)
    
    # Adding column to look at the normalized trading volume
    data['normalizedVol'] = data['Volume'] / data['Volume'].max()
    
    # Adding columns for moving averages to compare long-term results
    # Learnt about the rolling function through ChatGPT
    data['7DayMovingAvgPrice'] = round(data['Close'].rolling(window=7).mean(), 3)
    data['30DayMovingAvgPrice'] = round(data['Close'].rolling(window=30).mean(), 3)
    data['7DayMovingAvgVol'] = data['normalizedVol'].rolling(window=7).mean()
    data['30DayMovingAvgVol'] = data['normalizedVol'].rolling(window=30).mean()
    
    return data

In [48]:
def get_bitcoin_data(filename: str) -> pd.DataFrame:
    """
    Reads the bitcoin csv data into a pandas dataframe.
    
    :param filename: bitcoin csv file name
    :return: df: bitcoin data as a pandas dataframe
    """
    
    df = pd.read_csv(filename)
    
    # converting 'Date' column to pd.datetime format to perform calculations
    df['Date'] = pd.to_datetime(df['Date'])
    
    df = transform_bitcoin_data(df)
    
    return df

In [49]:
bitcoin_df = get_bitcoin_data('bitcoin_historical_data.csv')

In [50]:
bitcoin_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits', '%PriceChange', 'priceDiff', 'normalizedVol',
       '7DayMovingAvgPrice', '30DayMovingAvgPrice', '7DayMovingAvgVol',
       '30DayMovingAvgVol'],
      dtype='object')

In [51]:
bitcoin_df.head(45)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,%PriceChange,priceDiff,normalizedVol,7DayMovingAvgPrice,30DayMovingAvgPrice,7DayMovingAvgVol,30DayMovingAvgVol
0,2014-09-17 00:00:00+00:00,465.864014,468.174011,452.421997,457.334015,21056800,0.0,0.0,-1.831,-8.53,6e-05,,,,
1,2014-09-18 00:00:00+00:00,456.859985,456.859985,413.104004,424.440002,34483200,0.0,0.0,-7.096,-32.42,9.8e-05,,,,
2,2014-09-19 00:00:00+00:00,424.102997,427.834991,384.532013,394.79599,37919700,0.0,0.0,-6.91,-29.307,0.000108,,,,
3,2014-09-20 00:00:00+00:00,394.673004,423.29599,389.882996,408.903992,36863600,0.0,0.0,3.606,14.231,0.000105,,,,
4,2014-09-21 00:00:00+00:00,408.084991,412.425995,393.181,398.821014,26580100,0.0,0.0,-2.27,-9.264,7.6e-05,,,,
5,2014-09-22 00:00:00+00:00,399.100006,406.915985,397.130005,402.152008,24127600,0.0,0.0,0.765,3.052,6.9e-05,,,,
6,2014-09-23 00:00:00+00:00,402.09201,441.557007,396.196991,435.790985,45099500,0.0,0.0,8.381,33.699,0.000129,417.463,,9.2e-05,
7,2014-09-24 00:00:00+00:00,435.751007,436.112,421.131989,423.204987,30627700,0.0,0.0,-2.879,-12.546,8.7e-05,412.587,,9.6e-05,
8,2014-09-25 00:00:00+00:00,423.156006,423.519989,409.467987,411.574005,26814400,0.0,0.0,-2.737,-11.582,7.6e-05,410.749,,9.3e-05,
9,2014-09-26 00:00:00+00:00,411.428986,414.937988,400.009003,404.424988,21460800,0.0,0.0,-1.702,-7.004,6.1e-05,412.125,,8.6e-05,
