In [1]:
import pandas as pd

import test_config
from src.coinmarketcap import CoinMarketCapClient
from src.ctrend import CTREND

In [2]:
coinmarketcap_client = CoinMarketCapClient()
latest_raw = coinmarketcap_client.listing_latest()
latest_raw['is_stablecoin'] = latest_raw['tags'].apply(lambda x: True if 'stablecoin' in x else False)

In [3]:
# exception conditions
cand_by_marketcap = latest_raw.loc[~latest_raw['is_stablecoin']].copy()

value_lower_bound = 1000000
cand_by_marketcap = cand_by_marketcap.loc[cand_by_marketcap['market_cap'] > value_lower_bound]

# 상위 0.5%, 하위 0.5%의 경계 값 계산
except_quantile = 0.005
quantile_lower_bound = cand_by_marketcap['market_cap'].quantile(    except_quantile)
quantile_upper_bound = cand_by_marketcap['market_cap'].quantile(1 - except_quantile)

cand_by_marketcap = cand_by_marketcap[
    cand_by_marketcap['market_cap'].between(quantile_lower_bound, quantile_upper_bound, inclusive="neither")
]

cand_by_marketcap.head(3)

Unnamed: 0,id,name,symbol,slug,num_market_pairs,date_added,tags,max_supply,circulating_supply,total_supply,...,percent_change_7d,percent_change_30d,percent_change_60d,percent_change_90d,market_cap,market_cap_dominance,fully_diluted_market_cap,tvl,last_updated,is_stablecoin
11,5805,Avalanche,AVAX,avalanche,787,2020-07-13T00:00:00.000Z,"[defi, smart-contracts, three-arrows-capital-p...",715748719.0,406616800.0,446619920.29849,...,7.402363,16.981362,35.460185,0.47984,11270280000.0,0.4874,19838550000.0,,2024-10-16T14:02:00.000Z,False
12,5994,Shiba Inu,SHIB,shiba-inu,883,2020-08-01T00:00:00.000Z,"[memes, ethereum-ecosystem, doggone-doggerel]",,589267100000000.0,589518343717612.1,...,9.132151,41.058557,39.637868,6.014476,10990100000.0,0.4749,10994790000.0,,2024-10-16T14:02:00.000Z,False
13,1831,Bitcoin Cash,BCH,bitcoin-cash,934,2017-07-23T00:00:00.000Z,"[mineable, pow, sha-256, marketplace, medium-o...",21000000.0,19775040.0,19775040.625,...,12.982005,16.77143,5.385296,-4.065391,7232133000.0,0.3128,7680125000.0,,2024-10-16T14:02:00.000Z,False


In [4]:
from src.config.env import PROJECT_ROOT
raw = pd.read_csv(
    f'{PROJECT_ROOT}/raw_20230218_20240930.csv', index_col=[0]
).rename(columns={
    'opening_price': 'open',
    'trade_price': 'close',
    'high_price': 'high',
    'low_price': 'low',
    'candle_acc_trade_volume': 'volume',
    'candle_date_time_kst': 'kst_date'
})[
    ['kst_date', 'market', 'open', 'close', 'high', 'low', 'volume']
]
raw['kst_date'] = pd.to_datetime(raw['kst_date']).dt.date

In [53]:
df = []
for _, _df in raw.groupby(by=['market']):
    ctrend = CTREND(_df, 'kst_date')
    ctrend.set_features()
    df+=[ctrend.data]
df=pd.concat(df).dropna()

df['symbol'] = df['market'].apply(lambda x: x.split('-')[1])
df = df.loc[df['symbol'].isin(latest_raw['symbol'])]

# 주가 데이터를 ticker별로 그룹화한 후, 7일 뒤 종가를 shift로 구함
df['future_close'] = df.groupby('market')['close'].shift(-7)
# 7일 뒤 상승률 계산 ((7일 뒤 종가 - 현재 종가) / 현재 종가) * 100
df['y'] = (df['future_close'] - df['close']) / df['close'] * 100
df = df.drop(columns=['future_close', 'open', 'close', 'high', 'low', 'volume']).dropna()

In [55]:
df

Unnamed: 0_level_0,market,RSI,stochK,stochD,stochRSI,TP,SMA_3,volSMA_3,SMA_5,volSMA_5,...,volMACD,MACD_diff_signal,volMACD_diff_signal,Chaikin,Boll_mid,Boll_up,Boll_low,Boll_width,symbol,y
kst_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-08-25,BTC-BNB,43.527819,84.299826,80.953900,16.086965,0.008291,0.008266,1.572600e+00,0.008265,3.604960e+00,...,0.703757,0.000001,-0.669787,1.394682,0.008180,0.008458,0.007902,0.000556,BNB,-6.587513
2023-08-26,BTC-BNB,48.553504,93.864033,84.459237,39.218430,0.008301,0.008267,5.964000e-01,0.008275,2.093840e+00,...,0.741996,-0.000006,-0.554838,-0.796623,0.008193,0.008465,0.007921,0.000544,BNB,-6.251403
2023-08-26,BTC-BNB,48.553504,100.000000,92.721286,39.218430,0.008301,0.008278,1.003900e+00,0.008275,1.508120e+00,...,0.780235,-0.000012,-0.439755,-1.704948,0.008206,0.008470,0.007943,0.000526,BNB,-17.959013
2023-08-27,BTC-BNB,55.209429,100.000000,97.954678,69.853316,0.008312,0.008312,1.500451e+00,0.008290,9.758305e-01,...,0.145529,0.000004,-0.922713,-1.371809,0.008222,0.008483,0.007961,0.000521,BNB,-16.411286
2023-08-27,BTC-BNB,55.209429,100.000000,100.000000,69.853316,0.008312,0.008336,1.589502e+00,0.008310,1.273761e+00,...,-0.489178,0.000019,-1.298338,-0.933863,0.008237,0.008492,0.007983,0.000509,BNB,-9.702217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-19,KRW-ZRX,63.057325,100.000000,60.952381,100.000000,405.333333,390.333333,4.726677e+05,389.000000,3.427602e+05,...,-276955.677310,12.604701,-350086.126239,87494.892810,383.400000,409.640988,357.159012,52.481977,ZRX,11.538462
2024-09-20,KRW-ZRX,70.129870,100.000000,78.095238,100.000000,427.666667,405.333333,7.642242e+05,394.400000,5.490682e+05,...,-227451.990382,15.991453,-222586.403755,19208.715485,384.550000,415.767235,353.332765,62.434471,ZRX,12.470588
2024-09-21,KRW-ZRX,74.301676,100.000000,100.000000,100.000000,446.666667,432.000000,1.392289e+06,410.200000,9.293052e+05,...,-175336.402767,18.777778,-97949.954550,365558.518358,387.900000,432.280650,343.519350,88.761300,ZRX,3.076923
2024-09-22,KRW-ZRX,63.902439,66.666667,88.888889,79.698805,438.333333,435.666667,1.271161e+06,419.600000,9.976759e+05,...,-192399.448959,18.424501,-39530.257948,51868.605420,389.750000,437.459097,342.040903,95.418193,ZRX,9.367681


In [56]:
label_cols = ['market', 'symbol']
train_set = df.loc[df['symbol'].isin(cand_by_marketcap['symbol']) & (df.index <  df.index.max())]
test_set  = df.loc[df['symbol'].isin(cand_by_marketcap['symbol']) & (df.index == df.index.max())]

In [59]:
train_label, train_y, train_X = train_set[label_cols], train_set['y'], train_set.drop(columns=['y'] + label_cols)
test_label, test_y, test_X = test_set[label_cols], test_set['y'], test_set.drop(columns=['y'] + label_cols)

In [60]:
from sklearn.linear_model import ElasticNet
from lightgbm import LGBMRegressor
regr = ElasticNet(random_state=0)
lgbm = LGBMRegressor(random_state=0)

regr.fit(train_X, train_y)
lgbm.fit(train_X, train_y)

  model = cd_fast.enet_coordinate_descent(


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010528 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7140
[LightGBM] [Info] Number of data points in the train set: 60513, number of used features: 28
[LightGBM] [Info] Start training from score 1.315582


In [61]:
regr_pred = regr.predict(test_X)
lgbm_pred = lgbm.predict(test_X)

Unnamed: 0_level_0,market,symbol
kst_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-09-23,BTC-DICE,DICE
2024-09-23,BTC-KLAY,KLAY
2024-09-23,BTC-KSP,KSP
2024-09-23,BTC-ORB,ORB
2024-09-23,BTC-XRP,XRP
...,...,...
2024-09-23,KRW-XTZ,XTZ
2024-09-23,KRW-XVS,XVS
2024-09-23,KRW-YFI,YFI
2024-09-23,KRW-ZIL,ZIL


In [73]:
result = pd.concat([
    pd.Series(regr_pred).rename('elastic_net'),
    pd.Series(regr_pred).rename('lightgbm'),], 
    axis=1
)
result['pred_mean'] = (result['elastic_net'] + result['lightgbm']) / 2

In [80]:
result = pd.concat([
    test_label.reset_index(),
    test_y.rename('real').reset_index(),
    result
], axis=1)


In [89]:
result.loc[result['pred_mean'] >= result['pred_mean'].quantile(1-0.2)]

Unnamed: 0,kst_date,market,symbol,kst_date.1,real,kst_date.2,real.1,elastic_net,lightgbm,pred_mean
6,2024-09-23,KRW-AAVE,AAVE,2024-09-23,-11.456982,2024-09-23,-11.456982,3.229563,3.229563,3.229563
12,2024-09-23,KRW-ALICE,ALICE,2024-09-23,-2.032787,2024-09-23,-2.032787,3.360065,3.360065,3.360065
14,2024-09-23,KRW-ANKR,ANKR,2024-09-23,-6.966512,2024-09-23,-6.966512,3.592662,3.592662,3.592662
16,2024-09-23,KRW-APT,APT,2024-09-23,-0.094967,2024-09-23,-0.094967,3.359946,3.359946,3.359946
17,2024-09-23,KRW-AQT,AQT,2024-09-23,-2.530675,2024-09-23,-2.530675,3.901447,3.901447,3.901447
19,2024-09-23,KRW-ASM,ASM,2024-09-23,6.646751,2024-09-23,6.646751,3.396235,3.396235,3.396235
20,2024-09-23,KRW-ATOM,ATOM,2024-09-23,3.908795,2024-09-23,3.908795,3.269853,3.269853,3.269853
24,2024-09-23,KRW-BAL,BAL,2024-09-23,56.111929,2024-09-23,56.111929,3.24674,3.24674,3.24674
25,2024-09-23,KRW-BAT,BAT,2024-09-23,-4.296875,2024-09-23,-4.296875,3.146141,3.146141,3.146141
27,2024-09-23,KRW-BEL,BEL,2024-09-23,-6.682028,2024-09-23,-6.682028,3.272791,3.272791,3.272791
