In [61]:
import numpy as np
import datetime as datetime
import pandas as pd
import matplotlib.pyplot as plt
import requests
import os

In [62]:
def collect_data(assets: list, start: datetime.datetime, end: datetime.datetime, path="binance_1h",step='1h',quote_asset="USDT") -> None:
    """
    Collects and saves historical market data for specified assets in CSV files.

    Parameters:
    - assets (list): List of asset symbols to collect data for.
    - start (datetime.datetime): Start date and time for data collection.
    - end (datetime.datetime): End date and time for data collection.
    - path (str): Path to the directory where CSV files will be saved. Default is "binance_1m".
    """
    # Create the directory if it doesn't exist
    if not os.path.exists(path):
        os.makedirs(path)

    # Loop through each asset and collect data
    for asset in assets:
        df = get_data(asset, start, end,step,quote_asset)
        full_path = path + '/' + asset + '.csv'
        df.index = pd.to_datetime(df.index)
        df = df[~df.index.duplicated(keep='first')]
        df.to_csv(full_path)

def get_data(asset: str, start: datetime.datetime, end: datetime.datetime, step: str, quote_asset:str) -> pd.DataFrame:
    """
    Retrieves historical market data for a specific asset.

    Parameters:
    - asset (str): Symbol of the asset.
    - start (datetime.datetime): Start date and time for data collection.
    - end (datetime.datetime): End date and time for data collection.
    - step (int): Time interval for data points.
    - quote_asset (str): Quote asset symbol. Default is "USDT".

    Returns:
    - pd.DataFrame: DataFrame containing historical market data.
    """
    res = []
    limit = 1000
    start_time = start

    # Collect data in chunks of 'limit' hours until the end time is reached
    while start_time < end + datetime.timedelta(hours=limit):
        end_time = start_time + datetime.timedelta(hours=limit)
        res += data_call(asset, quote_asset, step, start_time, end_time, limit)
        start_time = end_time

    # Collect remaining data until the specified end time
    end_time = end
    res += data_call(asset, quote_asset, step, start_time, end_time, limit)

    return pd.DataFrame(data=res, columns=["Close", "Time"]).set_index("Time")

def data_call(asset: str, quote_asset: str, step: int, start_time: datetime.datetime, end_time: datetime.datetime, limit: int) -> list:
    """
    Makes API call to Binance to retrieve historical market data.

    Parameters:
    - asset (str): Symbol of the asset.
    - quote_asset (str): Quote asset symbol.
    - step (int): Time interval for data points.
    - start_time (datetime.datetime): Start date and time for data collection.
    - end_time (datetime.datetime): End date and time for data collection.
    - limit (int): Maximum number of data points per API call.

    Returns:
    - list: List of lists containing historical market data.
    """
    url = 'https://api.binance.com/api/v3/klines?symbol=' + asset + quote_asset + '&interval=' + str(step) + '&startTime=' + str(int(start_time.timestamp())) + '000' + '&endTime=' + str(int(end_time.timestamp())) + '000&limit=' + str(limit)
    data = requests.get(url).json()
    return extract_data(data)

def extract_data(data):
    """
    Extracts relevant data from the API response.

    Parameters:
    - data: API response containing raw market data.

    Returns:
    - list: List of lists containing relevant market data.
    """
    res = []
    for obj in data:
        date = datetime.datetime.fromtimestamp(int(str(obj[6])[:-3]) + 1)
        close_price = obj[4]
        res.append([close_price, date])
    return res

def load_data(assets: list, start: datetime.datetime, end: datetime.datetime, fields="Close", path="binance_1h"):
    """
    Load historical market data from CSV files for specified assets and time range.

    Parameters:
    - assets (list): List of asset symbols to load data for.
    - start (datetime.datetime): Start date and time for data loading.
    - end (datetime.datetime): End date and time for data loading.
    - fields (list): List of fields to include in the loaded data. Default is ["Close"].
    - path (str): Path to the directory where CSV files are stored. Default is "binance_1m".

    Returns:
    - pd.DataFrame: DataFrame containing loaded historical market data for specified assets and fields.
    """
    data = {}

    # Iterate through each asset and load data
    for asset in assets:
        # Read CSV file and set the "Time" column as the index
        obj = pd.read_csv(path + '/' + asset + '.csv').set_index("Time")

        # Keep only specified fields
        obj = obj[fields]
        
        # Convert the index to datetime format
        obj.index = pd.to_datetime(obj.index)

        # Filter data for the specified time range
        obj = obj.loc[start:end]

        # Drop any rows with missing values
        obj = obj.dropna()

        # Store the loaded data in the 'data' dictionary
        data[asset] = obj

    # Create a DataFrame from the 'data' dictionary
    return pd.DataFrame(data=data)


In [63]:
assets = ["BTC","ETH","BNB","XRP","SOL"]

In [7]:
start=datetime.datetime(2023, 1, 2 ,0, 0, 0, 0) 
end=datetime.datetime(2023, 3, 2 ,0, 0, 0, 0)

In [8]:
# Download Data
collect_data(assets,start,end)

In [64]:
# Load Local Data
data = load_data(assets,start,end)
data

Unnamed: 0_level_0,BTC,ETH,BNB,XRP,SOL
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-02 01:00:00,16616.75,1200.34,244.4,0.3387,9.99
2023-01-02 02:00:00,16588.35,1198.16,243.6,0.3366,9.91
2023-01-02 03:00:00,16565.04,1195.16,242.1,0.3253,9.85
2023-01-02 04:00:00,16587.85,1196.79,242.4,0.3266,9.92
2023-01-02 05:00:00,16661.94,1201.94,243.9,0.3287,9.99
...,...,...,...,...,...
2023-03-01 20:00:00,23690.88,1655.01,302.3,0.3831,22.42
2023-03-01 21:00:00,23355.14,1634.79,300.9,0.3805,22.15
2023-03-01 22:00:00,23421.23,1642.54,301.6,0.3833,22.25
2023-03-01 23:00:00,23553.73,1657.01,302.3,0.3828,22.45


In [53]:
x_array = np.array(data["BTC"])

In [54]:
log_returns = np.diff(np.log(x_array))

In [55]:
mu = np.mean(log_returns)
mu

0.0002458992399343684

In [56]:
sigma2 = np.var(log_returns)
sigma2

2.3912831202046395e-05

In [57]:
# Set up the data
x_array = np.array(data)

In [58]:
# Compute the logarithmic returns
log_returns = np.diff(np.log(x_array),axis=0)

In [59]:
# Compute the expected values vector
mu = np.mean(log_returns,axis=0)
print("The Expected Value vector\n\n",mu)

The Expected Value vector

 [2.45899240e-04 2.27646787e-04 1.49555329e-04 8.74227651e-05
 5.70970103e-04]


In [60]:
# Compute the covariance matrix
Sigma = np.round(np.cov(log_returns.T),7)
print("Covariance Matrix\n\n",np.round(Sigma[:7,:7],7))

Covariance Matrix

 [[2.390e-05 2.480e-05 1.910e-05 1.830e-05 3.740e-05]
 [2.480e-05 3.170e-05 2.340e-05 2.250e-05 4.670e-05]
 [1.910e-05 2.340e-05 2.920e-05 2.030e-05 4.050e-05]
 [1.830e-05 2.250e-05 2.030e-05 3.790e-05 4.080e-05]
 [3.740e-05 4.670e-05 4.050e-05 4.080e-05 1.404e-04]]
