Importing and cleaning dataset

In [66]:
import yfinance as yf
import pandas as pd
import statistics as st
import matplotlib.pyplot as plt
import numpy as np


# define the variable "btc" as the yf ticker function, and the argument as the BTC ticker.
btc = yf.Ticker("BTC-USD") 

# define the variable "btc_data" as the yf historical price retrieval function, with the start, end date and interval as arguments.
btc_data = btc.history(start="2020-01-01", end="2025-04-20", interval="1d")

# create the dataframe, using pandas function .DataFrame taking the variable "btc_data" as argument. This fills the dataframe with historical bitcoin prices as per defined in previous line. 
btc_df = pd.DataFrame(btc_data)

# clean up the dataset a little bit, removing volume, dividend and stock splits, as these are irrelevant.
btc_df = btc_df.drop(columns=["Volume", "Dividends", "Stock Splits"])

# Convert all price columns to integers (truncate decimals)
btc_df[['Open', 'High', 'Low', 'Close']] = btc_df[['Open', 'High', 'Low', 'Close']].astype(int)

# Convert DatetimeIndex to date only (remove time and timezone)
btc_df.index = btc_df.index.date

# print the head, or the start, of the dataframe.
btc_df

Unnamed: 0,Open,High,Low,Close
2020-01-01,7194,7254,7174,7200
2020-01-02,7202,7212,6935,6985
2020-01-03,6984,7413,6914,7344
2020-01-04,7345,7427,7309,7410
2020-01-05,7410,7544,7400,7411
...,...,...,...,...
2025-04-15,84539,86429,83598,83668
2025-04-16,83674,85428,83100,84033
2025-04-17,84030,85449,83749,84895
2025-04-18,84900,85095,84298,84450


In [68]:
# Create a complete date range from min to max date in the index
date_range = pd.date_range(start=btc_df.index.min(), end=btc_df.index.max(), freq='D')

# Check for missing dates
missing_dates = date_range.difference(btc_df.index)
if len(missing_dates) > 0:
    print("Missing dates:", missing_dates)
else:
    print("No missing dates")

No missing dates


Applying simple statistics to the dataset

In [69]:
btc_df.mean()

Open     39388.280475
High     40243.020145
Low      38491.389979
Close    39426.228306
dtype: float64

In [65]:
btc_df.median()

Open     35068.5
High     36066.5
Low      33879.5
Close    35184.5
dtype: float64