1. time: epoch timestamp in second (in UTC time zone) – indicates the day
2. high: highest BTC price of the day
3. low: lowest BTC price of the day
4. open: first BTC price of the day
5. close: last BTC price of the day
6. volumefrom: total volume (i.e., the total amount of currency exchanged) of the day in BTC
7. volumeto: total (i.e., the total amount of currency exchanged) volume of the day in USD


The data has been collected in daily between 28 April 2015 and 18 October 2020


There are 2001 days between the start and end dates. Our dataset also consists of 2001 samples, thus, each record represents a day.


In [None]:
import pandas as pd
import numpy as np
import time
import calendar
from datetime import datetime
import csv
import seaborn as sns
import plotly.express as px
from functools import reduce

In [None]:
df = pd.read_csv("cryptocompare_btc.csv")


In [None]:
df.head()


In [None]:
df.dtypes


In [None]:
df["dt"] = pd.to_datetime(df["time"], unit="s")


In [None]:
df.head()


In [None]:
df.dtypes


In [None]:
df["volume_ratio"] = df["volumeto"] / df["volumefrom"]

In [None]:
df.head()


In [None]:
df["short_average"] = df["volume_ratio"].rolling(window=3).mean()


In [None]:
df["long_average"] = df["volume_ratio"].rolling(window=10).mean()


In [None]:
df["short_minus_long"] = df["short_average"] - df["long_average"]

In [None]:
df.head()

In [None]:
plot_df = df.melt(
    id_vars=["dt"],
    value_vars=["volume_ratio", "short_average", "long_average", "short_minus_long"],
)


In [None]:
plot_df.head()

In [None]:
start_date = "2017/05/01"
end_date = "2017/06/12"

fig = px.bar(
    data_frame=plot_df[plot_df["dt"].between(start_date, end_date)],
    x="dt",
    y="value",
    color="variable",
    barmode="group",
)

fig.show()


In [None]:
start_date = "2018-09-05"
end_date = "2018-09-27"

fig = px.bar(
    data_frame=plot_df[plot_df["dt"].between(start_date, end_date)],
    x="dt",
    y="value",
    color="variable",
    barmode="group",
)

fig.show()


In [None]:
start_date = "2019-11-03"
end_date = "2019-11-14"

fig = px.bar(
    data_frame=plot_df[plot_df["dt"].between(start_date, end_date)],
    x="dt",
    y="value",
    color="variable",
    barmode="group",
)

fig.show()


# Part D

## Read CSV file

In [None]:
# total seconds per day: 24 hours * 60 minutes * 60 seconds
SECONDS_PER_DAY = 24 * 60 * 60

In [None]:
data = []
dataset_file_path = "cryptocompare_btc.csv"

In [None]:
with open(dataset_file_path, "r") as f:
    reader = csv.DictReader(f)
    data = [r for r in reader]

In [None]:
data = [dict(record, volume_ratio=float(record["volumeto"]) / float(record["volumefrom"])) for record in data]

# Daily high and low

In [None]:
# start_date, end_date = "2015/05/04", "2015/05/27"
# start_date, end_date = "2016/02/01", "2016/02/28"
start_date, end_date = "2016/12/08", "2016/12/11"

In [None]:
plot_df = df.loc[df["dt"].between(start_date, end_date)].melt(id_vars="dt", value_vars=["high", "low"])

In [None]:
plot_df.head()

In [None]:
fig = px.scatter(data_frame=plot_df, x='dt', y='value', color='variable', trendline='ols')

fig.show()