# Importing libraries

In [1]:
import os
import pandas as pd

os.chdir("..")



# Functions

## Data loading and preprocessing

In [2]:
def create_global_dataset(path, min_days=150):
    df_global = pd.DataFrame()
    for file in os.listdir(path):
        if file.split(".")[1] != "csv":
            continue
        df = pd.read_csv(path + file)
        df["token"] = file.split(".")[0]
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        if len(df) > min_days:
                df_global = pd.concat([df_global, df])
    df_global.to_csv(f"./data/df_global_{path.split('/')[-2]}.csv", index=False)
    return df_global


In [51]:
def compute_change(df, periods=7):
    df["change_1d"] = df.groupby("token")["close"].pct_change(periods=1).values
    df["change_7d"] = df.groupby("token")["close"].pct_change(periods=7).values
    df["change_30d"] = df.groupby("token")["close"].pct_change(periods=30).values
    df["change_90d"] = df.groupby("token")["close"].pct_change(periods=90).values
    
    df["volume_7d"] = df.groupby("token")["volume"].rolling(window=7).sum().values
    df["volume_30d"] = df.groupby("token")["volume"].rolling(window=30).sum().values
    df["volume_90d"] = df.groupby("token")["volume"].rolling(window=90).sum().values

    df["volume_change_7d"] = df.groupby("token")["volume"].pct_change(periods=7).values
    df["volume_change_30d"] = df.groupby("token")["volume"].pct_change(periods=30).values
    df["volume_change_90d"] = df.groupby("token")["volume"].pct_change(periods=90).values
    return df

In [58]:
def select_top10_asset(df):
    df = df.loc[df["timestamp"] == df["timestamp"].max()].drop_duplicates(subset=['token'], keep='first')
    df = df.sort_values(by=["volume_30d", "change_30d", "change_90d"], ascending=False)
    return df["token"].to_list()[:10]

# Computing global dataset

In [53]:
df_global = create_global_dataset("./data/1d/")


In [54]:
df_global = compute_change(df_global)
df_global.head(500)


Unnamed: 0,timestamp,open,high,low,close,volume,token,change_1d,change_7d,change_30d,change_90d,volume_7d,volume_30d,volume_90d,volume_change_7d,volume_change_30d,volume_change_90d
0,2019-09-29,0.03103,0.04400,0.03103,0.03556,25899875.5,HBAR,,,,,,,,,,
1,2019-09-30,0.03555,0.04745,0.03481,0.03780,40937695.7,HBAR,0.062992,,,,,,,,,
2,2019-10-01,0.03799,0.04000,0.03692,0.03876,16982421.3,HBAR,0.025397,,,,,,,,,
3,2019-10-02,0.03890,0.03967,0.03553,0.03750,8858024.3,HBAR,-0.032508,,,,,,,,,
4,2019-10-03,0.03750,0.04025,0.03557,0.03750,8395022.7,HBAR,0.000000,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2021-02-05,0.08978,0.09997,0.08944,0.09950,172481167.3,HBAR,0.105310,0.141841,1.595201,2.345662,41342184.03,1.340342e+08,5.651388e+08,0.043798,-0.172462,5.159736
496,2021-02-06,0.09950,0.11369,0.09800,0.10366,245991072.0,HBAR,0.041809,0.271746,1.710774,2.395349,62658019.93,1.557697e+08,5.851424e+08,0.487313,0.381961,27.316311
497,2021-02-07,0.10349,0.10549,0.08953,0.09670,172635827.2,HBAR,-0.067143,0.171979,0.587588,2.068867,89876969.83,1.762846e+08,6.095681e+08,-0.435011,-0.841258,10.864017
498,2021-02-08,0.09653,0.10233,0.09310,0.10014,143688780.3,HBAR,0.035574,0.123527,0.973591,2.185115,94809845.60,1.857034e+08,6.182320e+08,-0.381739,-0.545657,3.633485


In [59]:
asset_selected = ["USDT", "BTC", "ETH", "BNB", "SOL", "XRP", "ADA", "AVAX", "DOT", "MATIC"] + select_top10_asset(df_global)
df_global = df_global.loc[df_global["token"].isin(asset_selected)]