In [292]:
import time
from typing import Dict, List
import pandas as pd
import numpy as np
import datetime
import glob

STOREPath =  "/Users/maoguan/workspace/RL-AMM/data/factorData/"
OHLCPath = "/Users/maoguan/workspace/RL-AMM/data/lakeAPIData/OHLC/"


fileLists = glob.glob(OHLCPath + "*")


def extractDateFromTime(timestamp:pd.Timestamp)->datetime.date:
    year = timestamp.year
    month = timestamp.month
    day = timestamp.day
    date = datetime.datetime(year=year, month=month, day=day)
    return date 

data = []

for file in fileLists:

    minuteLevelOHLC = pd.read_csv(file, compression="gzip")
    minuteLevelOHLC = minuteLevelOHLC.convert_dtypes()
    minuteLevelOHLC['origin_time'] = pd.to_datetime(minuteLevelOHLC['origin_time'])
    minuteLevelOHLC['date'] = minuteLevelOHLC['origin_time'].apply(lambda x : extractDateFromTime(x))
    data += [minuteLevelOHLC]

data = pd.concat(data)
data['return'] = (data['close'] - data['open'])/data['open']
#Market return is defined as the average return of all the cryptocurrencies return
mktReturn = data.groupby("origin_time").agg({'return':np.mean})
mktReturn.rename(columns={'return':'mktReturn'}, inplace=True)
data = pd.merge(data, mktReturn, on='origin_time', how='left')
data = data.dropna()


In [293]:
TOTALNUMOFCOINS = len(set(data['symbol'].values))
LOWLEVEL = int(TOTALNUMOFCOINS/3)
HIGHLEVEL= int(TOTALNUMOFCOINS - TOTALNUMOFCOINS/3)

In [294]:
def generateLiqBucketSymbols(data:pd.DataFrame)->Dict:
    """"""
    Liq = data.groupby("symbol").agg({'trades':np.mean}).sort_values(by='trades')
    Liq.rename(columns={'trades':'Liq'}, inplace=True)
    LOWLiq = Liq['Liq'].values[0:LOWLEVEL].max()
    HIGHLiq = Liq['Liq'].values[0:HIGHLEVEL].max()
    lowLiqSymbols = Liq[Liq['Liq']<=LOWLiq].reset_index()['symbol'].values
    mediumLiqSymbols = Liq[(Liq['Liq']>LOWLiq)&(Liq['Liq']<HIGHLiq)].reset_index()['symbol'].values
    highLiqSymbols = Liq[Liq['Liq']>=HIGHLiq].reset_index()['symbol'].values
    return {'low': lowLiqSymbols, 'medium':mediumLiqSymbols, 'high':highLiqSymbols}

LiqBucketSyms = generateLiqBucketSymbols(data)

In [295]:
def getBeta(xVar:str, yVar:str, df:pd.DataFrame) -> float:
    df = df.sort_values("received_time").set_index("received_time")
    x = df[xVar].values
    y = df[yVar].values
    beta = np.sum( (x - np.mean(x))*(y - np.mean(y)) ) / np.sum((x - np.mean(x))**2)
    return beta 


Beta = data.groupby("symbol").apply(lambda x : getBeta('return','mktReturn', x))
Beta = pd.DataFrame({'beta':Beta})

def generateVolBucketSymbols(Beta:pd.DataFrame) ->Dict:
    Beta = Beta.sort_values(by='beta')
    LOWBeta = Beta['beta'].values[0:LOWLEVEL].max()
    HIGHBeta = Beta['beta'].values[0:HIGHLEVEL].max()
    lowBetaSymbols = Beta[Beta['beta']<=LOWBeta].reset_index()['symbol'].values
    mediumBetaSymbols = Beta[(Beta['beta']>LOWBeta)&(Beta['beta']<HIGHBeta)].reset_index()['symbol'].values
    highBetaSymbols = Beta[Beta['beta']>=HIGHBeta].reset_index()['symbol'].values
    return {'low':lowBetaSymbols, 'medium':mediumBetaSymbols, 'high':highBetaSymbols}

VolBucketSyms = generateVolBucketSymbols(Beta)

In [296]:
import functools

def getMomentum(basicRet:str, df:pd.DataFrame, windows:List = [60,2*60, 3*60,6*60,12*60]) -> float:
    df = df.sort_values(by='received_time')
    for window in windows:
        df[basicRet+str(window)] = df.rolling(window)['return'].sum()
    df['momentum'] = functools.reduce(lambda a, b: a+b, [df[basicRet+str(window)] for window in windows])

    return df 

momentum = data.groupby("symbol").apply(lambda x: getMomentum('return',x)).reset_index(drop=True)
momentum = momentum.groupby(["symbol","date"]).agg({'momentum':np.mean}).reset_index()
#last day momentum
momentum = momentum[momentum['date'] == '2024-01-30']

def generateMomBucketSymbols(Mom:pd.DataFrame, date:str)->Dict:
    Mom = Mom[Mom['date']==date].sort_values(by='momentum')
    LOWMom = Mom['momentum'].values[0:LOWLEVEL].max()
    HIGHMom = Mom['momentum'].values[0:HIGHLEVEL].max()
    lowMomSymbols = Mom[Mom['momentum']<=LOWMom].reset_index()['symbol'].values
    mediumMomSymbols = Mom[(Mom['momentum']>LOWMom)&(Mom['momentum']<HIGHMom)].reset_index()['symbol'].values
    highMomSymbols = Mom[Mom['momentum']>=HIGHMom].reset_index()['symbol'].values
    return {'low':lowMomSymbols, 'medium':mediumMomSymbols, 'high':highMomSymbols}

MomBucketSyms = generateMomBucketSymbols(momentum,'2024-01-30')

In [300]:
class riskFactor(object):
    
    def __init__(self, name, componentSymbols, freq = '1m'):
        self._factorName = name
        self._symbols = componentSymbols
        self._freq = '1m'
        return 
    
    @property
    def name(self):
        return self._factorName
    
    @property
    def symbols(self):
        return self._symbols
        
    @property
    def freq(self):
        return self._freq
    
    @property
    def factorRetTS(self):
        return self._factorTS
    
    def getFactorReturnTS(self, data:pd.DataFrame)->pd.DataFrame:
        """
        Calculate risk factor returns given frequency and symbols
        Parameters
        ----------
        data: a panel dataframe with all symbols and their corresponding returns

        Returns: a timeSeries table of risk factor returns and time
        -------

        """
        factorRet = data[data['symbol'].isin(self._symbols)].groupby("origin_time").agg({'return':np.mean})
        factorRet.rename(columns={'return':self._factorName}, inplace=True)
        self._factorTS = factorRet
        return 
        
    
    def storeData(self):
        self._factorTS.to_csv(STOREPath+self._factorName+".csv", index = False)
        

## Systematic Risk Factors

>* Liquidity: High,Medium, Low, liquidity is defined as the average trading volume
>* Momentum: High, Medium, Low, momentum is defined as the crypto's past 3 hour, 6 hours, 12 hours, 18 hours
>* Market: Average crypto coins return
>* Volatility: High, Medium, Low, defined as the beta coefficient to the market risk

In [301]:
marketRisk = riskFactor("market", list(set(data['symbol'].values)))
marketRisk.getFactorReturnTS(data)
marketRisk.storeData()
#Liquidity Risk Bucket
lowLiqRisk = riskFactor("Liq:low", LiqBucketSyms['low'])
lowLiqRisk.getFactorReturnTS(data)
lowLiqRisk.storeData()

medLiqRisk = riskFactor("Liq:med", LiqBucketSyms['medium'])
medLiqRisk.getFactorReturnTS(data)
medLiqRisk.storeData()

highLiqRisk = riskFactor("Liq:high", LiqBucketSyms['high'])
highLiqRisk.getFactorReturnTS(data)
highLiqRisk.storeData()
#Volatility Risk Bucket
lowVolRisk = riskFactor("Vol:low", VolBucketSyms['low'])
lowVolRisk.getFactorReturnTS(data)
lowVolRisk.storeData()

medVolRisk = riskFactor("Vol:med", VolBucketSyms['medium'])
medVolRisk.getFactorReturnTS(data)
medVolRisk.storeData()

highVolRisk = riskFactor("Vol:high", VolBucketSyms['high'])
highVolRisk.getFactorReturnTS(data)
highVolRisk.storeData()
#Momentum Risk Bucket
lowMomRisk = riskFactor("Mom:low", MomBucketSyms['low'])
lowMomRisk.getFactorReturnTS(data)
lowMomRisk.storeData()

medMomRisk = riskFactor("Mom:med", MomBucketSyms['medium'])
medMomRisk.getFactorReturnTS(data)
medMomRisk.storeData()

highMomRisk = riskFactor("Mom:high", MomBucketSyms['high'])
highMomRisk.getFactorReturnTS(data)
highMomRisk.storeData()

In [305]:
STOREPath

'/Users/maoguan/workspace/RL-AMM/data/factorData/'

In [306]:
marketRisk.factorRetTS.to_csv(STOREPath+"marketRisk.csv")