In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
import matplotlib.pyplot as plt

np.set_printoptions(suppress=True)

In [20]:
def generate_requirements(*modules):
    with open("requirements.txt",mode="a") as handle:
        for module in modules:
            handle.write(f"{module.__name__}=={module.__version__}\n")

In [7]:
# import
PATH = "BTC_USD_2012_present.csv"
records = pd.read_csv(PATH)

In [8]:
def convert_to_datetime(ts:int) -> datetime | None:
    """
    Converts timestamps to datetime objects
    Should the wrong intput be passed, return None
    """
    try:
        return datetime.fromtimestamp(ts)
    except (TypeError, ValueError):
        return None

In [13]:
def calculate_change(data: pd.Series, start: date, end: date) -> float | None:    
    """
        Calculates percentage change between two dates
        Slould a KeyError appear, returns None
    """
    try:
        return data.asof(end) / data.asof(start) - 1
    except KeyError:
        return None

In [9]:
# clean the data
records.dropna(inplace=True)
records["Timestamp"] = records["Timestamp"].map(convert_to_datetime)
records.rename(columns=lambda x: x.lower(), inplace=True)
records.set_index("timestamp", inplace=True)

In [10]:
records.tail()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-10-01 01:56:00,63303.0,63321.0,63303.0,63321.0,1.778449
2024-10-01 01:57:00,63321.0,63321.0,63321.0,63321.0,0.0
2024-10-01 01:58:00,63323.0,63332.0,63323.0,63332.0,0.774621
2024-10-01 01:59:00,63329.0,63329.0,63302.0,63302.0,6.581603
2024-10-01 02:00:00,63331.0,63400.0,63321.0,63400.0,8.828026


In [11]:
# aggregate the data
agg_close = records["close"].groupby(records.index.date).ohlc()
agg_volume = records["volume"].groupby(records.index.date).sum()

records = pd.concat([agg_close, agg_volume],axis="columns")

In [12]:
records.tail()

Unnamed: 0,open,high,low,close,volume
2024-09-27,65060.0,65223.0,64761.0,65175.0,51.58711
2024-09-28,65848.0,65898.0,65452.0,65680.0,389.040558
2024-09-29,65680.0,65949.0,65592.0,65872.0,48.879495
2024-09-30,64616.0,64616.0,63049.0,63699.0,1109.221052
2024-10-01,63699.0,63737.0,62910.0,63400.0,248.418448


In [14]:
# if you held BTC for a month what would your average profit be?
SPAN_DAYS = 30
DATASET_END_DATE = records.index[-1]

start_date = records.index[0]
end_date = start_date + timedelta(days=SPAN_DAYS)
pct_changes = []

In [15]:
while True:        
    value = calculate_change(records["close"], start_date, end_date)
    if value: 
        pct_changes.append(value)

    start_date += timedelta(days=1)
    end_date += timedelta(days=1)

    if end_date > DATASET_END_DATE: 
        break

In [18]:
# Describes the returns in percentage
(pd.Series(pct_changes)
 .describe(percentiles=[0.01,0.25,0.5,0.75,0.99])
 .drop(["count","mean","std"])
 .round(2) * 100
)

min    -58.0
1%     -39.0
25%     -8.0
50%      3.0
75%     21.0
99%    160.0
max    451.0
dtype: float64