In [1]:
import numpy as np
import datetime as datetime
import pandas as pd
import matplotlib.pyplot as plt
import requests
import os

In [2]:
def collect_data(assets: list, start: datetime.datetime, end: datetime.datetime, path="binance_1m",step='1m',quote_asset="USDT") -> None:

    if not os.path.exists(path):
        os.makedirs(path)

    for asset in assets:
        df = get_data(asset, start, end,step,quote_asset)
        full_path = path + '/' + asset + '.csv'
        df.index = pd.to_datetime(df.index)
        df = df[~df.index.duplicated(keep='first')]
        df.to_csv(full_path)

def get_data(asset: str, start: datetime.datetime, end: datetime.datetime, step: str, quote_asset:str) -> pd.DataFrame:

    res = []
    limit = 1000
    start_time = start

    while start_time < end + datetime.timedelta(minutes=limit):
        end_time = start_time + datetime.timedelta(minutes=limit)
        res += data_call(asset, quote_asset, step, start_time, end_time, limit)
        start_time = end_time

    end_time = end
    res += data_call(asset, quote_asset, step, start_time, end_time, limit)

    return pd.DataFrame(data=res, columns=["Close", "Time"]).set_index("Time")

def data_call(asset: str, quote_asset: str, step: int, start_time: datetime.datetime, end_time: datetime.datetime, limit: int) -> list:
    url = 'https://api.binance.com/api/v3/klines?symbol=' + asset + quote_asset + '&interval=' + str(step) + '&startTime=' + str(int(start_time.timestamp())) + '000' + '&endTime=' + str(int(end_time.timestamp())) + '000&limit=' + str(limit)
    data = requests.get(url).json()
    return extract_data(data)

def extract_data(data):
    res = []
    for obj in data:
        date = datetime.datetime.fromtimestamp(int(str(obj[6])[:-3]) + 1)
        close_price = obj[4]
        res.append([close_price, date])
    return res

def load_data(assets: list, start: datetime.datetime, end: datetime.datetime, fields="Close", path="binance_1m"):
    data = {}

    for asset in assets:
        obj = pd.read_csv(path + '/' + asset + '.csv').set_index("Time")

        obj = obj[fields]
        
        obj.index = pd.to_datetime(obj.index)

        obj = obj.loc[start:end]

        obj = obj.dropna()

        data[asset] = obj

    return pd.DataFrame(data=data)
    

In [3]:
assets = ["BTC","ETH","BNB","XRP","ADA","LTC","SOL","UNI","AAVE","LINK","MKR","FTM","EOS","SAND","XTZ","TRX"]

In [None]:
len(assets)

In [6]:
start=datetime.datetime(2024, 1, 2 ,0, 0, 0, 0) 
end=datetime.datetime(2024, 3, 2 ,0, 0, 0, 0)
collect_data(assets,start,end)

In [None]:
data = load_data(assets,start,end)
data

In [8]:
lag = 240
i = 10000
x_array = np.array(data.iloc[i:i+lag])
x_array = x_array/x_array[0]

In [9]:
def lin_regression_weights(X: np.ndarray, Y: np.ndarray):
    X_transpose_X = np.dot(X.T, X)

    X_transpose_X_inv = np.linalg.inv(X_transpose_X)

    X_inv_X_transpose = np.dot(X_transpose_X_inv, X.T)

    weights = np.dot(X_inv_X_transpose, Y)

    return weights

In [10]:
w = lin_regression_weights(x_array,x_array)

In [None]:
dims = (16,12)
k = 0
fig = plt.subplots(figsize=dims)
plt.plot(x_array[:,k])
plt.plot(x_array@w[k])
plt.show()

In [None]:
print(np.round(w,6))

In [43]:
def ridge_regression_weights(X: np.ndarray, Y: np.ndarray, lambda_=0.001):
    X_transpose_X = np.dot(X.T, X)
    regularization_term = lambda_ * np.eye(X.shape[1])
    X_transpose_X_regularized = X_transpose_X + regularization_term
    X_transpose_X_inv = np.linalg.inv(X_transpose_X_regularized)
    X_inv_X_transpose = np.dot(X_transpose_X_inv, X.T)
    weights = np.dot(X_inv_X_transpose, Y)
    return weights

In [44]:
w = ridge_regression_weights(x_array,x_array,lambda_ = 0.001)

In [None]:
dims = (16,12)
k = 0
fig = plt.subplots(figsize=dims)
plt.plot(x_array[:,k])
plt.plot(x_array@w[k])
plt.show()

In [None]:
fig = plt.subplots(figsize=dims)
plt.plot(x_array@w[k]-x_array[:,k])
plt.show()