# get_data: 필요한 부분만 추출

# Imports

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine # , MetaData, Table
from backtesting import Backtest, Strategy
# from backtesting.lib import crossover
# from backtesting.test import SMA
from matplotlib import font_manager, rc
import platform

import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller # coint
from datetime import datetime, timedelta
import networkx as nx  # 그래프 기반 매칭을 위해 사용

from joblib import Parallel, delayed

# 운영체제에 따라 적절한 한글 폰트 설정
if platform.system() == 'Darwin':  # macOS의 경우
    rc('font', family='AppleGothic')

plt.rcParams['axes.unicode_minus'] = False

  from .autonotebook import tqdm as notebook_tqdm


# Data

In [2]:
# SQLite DB 파일 경로 지정 (예: price_data.db 라는 파일)
db_path = 'crypto_resampled.db'
engine = create_engine(f'sqlite:///{db_path}')

# price_30min 테이블을 DataFrame으로 불러오기
price_30_min = pd.read_sql_table('price_30min', con=engine)

# ✅ Date 컬럼을 datetime으로 변환
price_30_min['Date'] = pd.to_datetime(price_30_min['Date'])

# ✅ Date를 인덱스로 설정
price_30_min.set_index('Date', inplace=True)

# ✅ 정렬까지 해주는 것이 안전
price_30_min.sort_index(inplace=True)

# 데이터 확인
print('price_30_min.head()')
display(price_30_min.head())

# top_200
top_200 = price_30_min.columns.tolist()
print('top_200')
print(top_200)

ValueError: Table price_30min not found

In [3]:
top_200_new = top_200[:-4]
existing_stocks = [stock for stock in top_200_new if stock in price_30_min.columns]
price_30_min_filltered = price_30_min[existing_stocks]
display(price_30_min_filltered)

Unnamed: 0_level_0,CVC,RLC,RAD,UTK,AERGO,BTC,ETH,XRP,SOL,BNB,...,CELO,BEL,DASH,CVX,AGLD,DEXE,SCRT,ZEC,FARM,ACA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-01 00:00:00,0.0843,1.523,1.893,0.09260,0.1087,27281.27,1886.73,0.5129,20.97,307.30,...,0.5050,0.7900,42.04,4.188,0.445,2.419,0.3940,32.30,27.08,0.0487
2023-06-01 00:30:00,0.0836,1.512,1.910,0.09200,0.1082,27065.77,1871.27,0.5082,20.78,306.10,...,0.5010,0.7788,41.84,4.156,0.441,2.412,0.3900,32.10,26.99,0.0493
2023-06-01 01:00:00,0.0839,1.523,1.930,0.09220,0.1086,27140.90,1877.97,0.5088,20.82,306.70,...,0.5030,0.7866,42.02,4.169,0.441,2.408,0.3900,32.10,27.04,0.0487
2023-06-01 01:30:00,0.0838,1.518,1.914,0.09180,0.1085,27078.56,1874.17,0.5088,20.77,306.70,...,0.5020,0.7827,41.87,4.161,0.439,2.409,0.3910,32.10,27.09,0.0486
2023-06-01 02:00:00,0.0834,1.513,1.930,0.09160,0.1082,27076.68,1872.26,0.5075,20.71,306.00,...,0.5030,0.7815,41.62,4.148,0.439,2.410,0.3910,32.10,27.06,0.0484
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-18 22:00:00,0.0987,1.272,0.804,0.03869,0.0783,82274.99,1911.70,2.2663,124.65,630.06,...,0.3498,0.6263,23.26,2.040,0.966,18.320,0.2022,33.34,28.12,0.0429
2025-03-18 22:30:00,0.0997,1.277,0.806,0.03867,0.0778,82395.74,1917.93,2.2809,125.02,629.57,...,0.3512,0.6313,23.64,2.052,0.975,18.260,0.2042,33.38,28.28,0.0432
2025-03-18 23:00:00,0.1002,1.284,0.808,0.03884,0.0784,82777.36,1933.76,2.2903,125.38,627.39,...,0.3537,0.6313,23.98,2.056,0.988,18.310,0.2047,33.43,28.40,0.0433
2025-03-18 23:30:00,0.1005,1.280,0.809,0.03880,0.0787,82715.03,1931.54,2.2851,125.33,627.81,...,0.3537,0.6312,24.12,2.069,0.984,18.365,0.2041,33.14,28.41,0.0434


# Defs

In [4]:
class PairSelector:
    def __init__(self, prices):
        self.prices = prices
        self.pairs = self.generate_pairs()
        self.filtered_pairs_1 = []
        self.filtered_pairs_2 = []
        print(f"[PairSelector] 총 후보 페어 개수: {len(self.pairs)}")

    def generate_pairs(self):
        tickers = self.prices.columns.tolist()
        pairs = []
        for i in range(len(tickers)):
            for j in range(len(tickers)):
                if tickers[i] != tickers[j]:
                    pairs.append((tickers[i], tickers[j]))
        return pairs

    def filter_pairs(self, pair):
        try:

            price1 = self.prices[pair[0]]
            price2 = self.prices[pair[1]]

            corr = price1.corr(price2)
            if (abs(corr) >= 0.95):  # 상관계수가 높은 페어만 통과
                return pair
            return None
        except Exception as e:
            print(e)
            return None
        
    def filter_pairs3(self, pair):
        try:
            #D’Agostino’s K² 검정
            price1 = self.prices[pair[0]]
            price2 = self.prices[pair[1]]
            X = sm.add_constant(price2)
            model = sm.OLS(price1, X).fit()
            alpha, beta = model.params
            spread = price1 - beta * price2 - alpha
            
            stat, p_value = stats.normaltest(spread.dropna())
            # p-value가 0.05보다 크면 정규성을 기각하지 않으므로 통과.
            if p_value > 0.05:
                return pair
            return None
        except Exception as e:
            print(e)
            return None
        
    def filter_pairs4(self, pair):
        try:
            #D’Agostino’s K² 검정
            price1 = self.prices[pair[0]]
            price2 = self.prices[pair[1]]
            X = sm.add_constant(price2)
            model = sm.OLS(price1, X).fit()
            alpha, beta = model.params
            spread = price1 - beta * price2 - alpha
    
            price_range = spread.max() - spread.min()  # 가격 범위 (최댓값 - 최솟값)
            volatility = spread.std()  # 표준편차 (변동성)
            
            if price_range != 0:
                relative_volatility = volatility / price_range  # 상대 변동성 계산
            else:
                relative_volatility = np.nan  # 가격 범위가 0일 경우 NaN 처리

            if relative_volatility <= 0.125:
                return pair
            return None
        except Exception as e:
            print(e)
            return None

    def filter_pairs2(self, pair):
        
        try:
            price1 = self.prices[pair[0]]
            price2 = self.prices[pair[1]]

            X = sm.add_constant(price2)
            model = sm.OLS(price1, X).fit()
            alpha, beta = model.params
            spread = price1 - beta * price2 - alpha

            adf_result = adfuller(spread)
            if adf_result[1] < 0.05:
                return pair
            else:
                return None
        except Exception as e:
            print(e)
            return None    

    def run(self):

        results = Parallel(n_jobs=-1, verbose=0)(
            delayed(self.filter_pairs)(pair) for pair in self.pairs
        )

        results = [r for r in results if r is not None]

        print(f'첫 번째 필터에서 살아남은 페어의 개수는 {len(results)}개입니다---------')

        results2 = Parallel(n_jobs=-1, verbose=0)(
            delayed(self.filter_pairs2)(pair) for pair in results
        )

        results2 = [r for r in results2 if r is not None]

        print(f'두 번째 필터에서 살아남은 페어의 개수는 {len(results2)}개입니다---------')

        # results3 = Parallel(n_jobs=-1, verbose=1)(
        #     delayed(self.filter_pairs3)(pair) for pair in results2
        # )

        # results3 = [r for r in results3 if r is not None]

        # print(f'세 번째 필터에서 살아남은 페어의 개수는 {len(results3)}개입니다---------')

        # results4 = Parallel(n_jobs=-1, verbose=1)(
        #     delayed(self.filter_pairs4)(pair) for pair in results3
        # )

        # results4 = [r for r in results4 if r is not None]

        # print(f'네 번째 필터에서 살아남은 페어의 개수는 {len(results4)}개입니다---------')

        return results2

class HalfLifeEstimator:
    def __init__(self, prices, pairs):
        self.prices = prices
        self.pairs = pairs
        self.pair_stats = {}
        
    def estimate_half_life(self, spread):
        delta_spread = spread.diff().dropna()  # ΔS_t = S_t - S_t-1 계산
        spread_lag = spread.shift(1).dropna()  # S_t-1 생성 (이전 시점의 스프레드)
        spread_lag, delta_spread = spread_lag.align(delta_spread, join='inner')  # ΔS_t와 S_t-1 맞춤

        # 회귀 분석 수행하여 θ 추정
        theta_model = sm.OLS(delta_spread, sm.add_constant(spread_lag)).fit()
        theta = -theta_model.params[0]  # θ 추정값 (음수 부호 주의)

        if theta > 0:
            halflife = np.log(2) / (theta) 
        else:
            halflife = np.nan

        return halflife
    
    def select_pair(self):

        for pair in self.pairs:

            price1 = self.prices[pair[0]]
            price2 = self.prices[pair[1]]
            
            X = sm.add_constant(price2)  # 자산 2를 독립 변수로 설정
            model = sm.OLS(price1, X).fit()  # OLS 회귀 분석 수행
            alpha, beta = model.params  # 상수항(α)과 기울기(β) 추정
            spread = price1 - beta * price2 - alpha  # 스프레드 계산
            # 반감기 추정
            halflife = self.estimate_half_life(spread)
            if halflife > 0:
                self.half_lives[pair] = halflife
                self.alphas[pair] = alpha
                self.betas[pair] = beta

        # 반감기가 가장 낮은 페어 선택
        #selected_pair = min(self.half_lives, key=self.half_lives.get)

        sorted_values = sorted(self.half_lives.values())
        smallest_value = sorted_values[0]  # 두 번째로 작은 값

        # 2. 두 번째로 작은 값을 가지는 키 찾기
        selected_pair = next(k for k, v in self.half_lives.items() if v == smallest_value)
        print(f"[HalfLifeEstimator.select_pair] 선택된 페어: {selected_pair} (halflife: {smallest_value:.4f})")
        return selected_pair, self.half_lives[selected_pair], self.alphas[selected_pair], self.betas[selected_pair]
    
    def select_pairs(self):
        for pair in self.pairs:
            price1 = self.prices[pair[0]]
            price2 = self.prices[pair[1]]
            X = sm.add_constant(price2)
            model = sm.OLS(price1, X).fit()
            alpha, beta = model.params
            spread = price1 - beta * price2 - alpha
            halflife = self.estimate_half_life(spread)
            if np.isfinite(halflife) and halflife > 0:
                self.pair_stats[pair] = {'halflife': halflife, 'alpha': alpha, 'beta': beta, 'spread': spread}
        
        sorted_pairs = sorted(self.pair_stats.items(), key=lambda x: x[1]['halflife'])[:3]
        return sorted_pairs
    
    def select_pairs_graph(self, k=10):
        # 각 후보 페어에 대해 halflife, alpha, beta, spread, 분산 등을 계산
        for pair in self.pairs:
            try:
                price1 = self.prices[pair[0]]
                price2 = self.prices[pair[1]]
                X = sm.add_constant(price2)
                model = sm.OLS(price1, X).fit()
                alpha, beta = model.params
                spread = price1 - beta * price2 - alpha
                halflife = self.estimate_half_life(spread)
                if np.isfinite(halflife) and halflife > 0:
                    self.pair_stats[pair] = {
                        'halflife': halflife, 
                        'alpha': alpha, 
                        'beta': beta, 
                        'spread': spread
                    }
            except Exception as e:
                print(f"Error for pair {pair}: {e}")
        
        # 후보 페어들을 그래프로 구성 (노드는 종목, 엣지는 페어)
        G = nx.Graph()
        for pair, stats_dict in self.pair_stats.items():
            u, v = pair
            # 가중치는 해당 페어의 halflife로 설정
            G.add_edge(u, v, weight=stats_dict['halflife'], stats=stats_dict)
        
        # 최소 가중치 매칭을 구함 (각 노드가 단 한 번씩만 사용됨)
        matching = nx.algorithms.matching.min_weight_matching(G, weight='weight')
        
        # matching은 frozenset 형태의 두 종목 집합이므로 이를 리스트로 변환
        matching_list = []
        for edge in matching:
            u, v = tuple(edge)
            weight = G[u][v]['weight']
            stats_dict = G[u][v]['stats']
            matching_list.append(((u, v), weight, stats_dict))
        
        # 가중치(halflife) 기준 오름차순 정렬 후 상위 k개 선택
        matching_list = sorted(matching_list, key=lambda x: x[1])
        selected = matching_list[:k]
        selected_pairs = [(pair, stats) for (pair, weight, stats) in selected]
        print(f"[HalfLifeEstimator.select_pairs_graph] 선택된 페어 수: {len(selected_pairs)}")
        return selected_pairs
    
class TradingStrategy:
    def __init__(self, prices, pair, alpha, beta, lookback, test_index, transaction_cost):
        self.prices = prices
        self.pair = pair
        self.lookback = lookback
        self.alpha = alpha
        self.beta = beta
        self.test_index = test_index
        self.transaction_cost = transaction_cost
        self.spread = None
        self.zscore = None
        self.positions = None
        self.zscore_st = None
        
    def calculate_spread(self):
        price1 = self.prices[self.pair[0]]
        price2 = self.prices[self.pair[1]]
        self.spread = price1 - self.beta * price2 - self.alpha  # 스프레드 계산

    def calculate_zscore(self):
        # EWMA 기반 z-score 계산
        spread_mean = self.spread.ewm(span=self.lookback, adjust=False).mean()
        spread_std = self.spread.ewm(span=self.lookback, adjust=False).std()
        self.zscore = (self.spread - spread_mean) / spread_std
        self.zscore = self.zscore[self.test_index]
        return self.zscore
    
    def generate_signals(self):
        self.positions = pd.DataFrame(index=self.zscore.index)
        self.zscore_st = self.zscore.ewm(span=self.lookback, adjust=False).std()
        upper_entry = self.zscore_st * 2
        lower_entry = -upper_entry
        upper_exit = self.zscore_st
        lower_exit = -upper_exit
        position_list = [0, 0]
    
        # 위험 임계값 설정 (예: long 포지션은 zscore가 +3 이상, short 포지션은 -3 이하일 때 손절)
        # risk_threshold_long = 3.0
        # risk_threshold_short = -3.0

        for t in range(2, len(self.zscore)):
            if t == len(self.zscore) - 1:
                position_list.append(0)
                continue

            z_t1 = self.zscore.iloc[t-1]
            z_t2 = self.zscore.iloc[t-2]
            current_signal = position_list[-1]


            # Long Entry
            if z_t1 < lower_entry.iloc[t-1] and z_t2 >= lower_entry.iloc[t-2]: # and self.zscore.iloc[t] > -4:
                new_signal = 1
            # Long Exit
            elif z_t1 > lower_exit.iloc[t-1] and z_t2 <= lower_exit.iloc[t-2]:
                new_signal = 0
            # Short Entry
            elif z_t1 > upper_entry.iloc[t-1] and z_t2 <= upper_entry.iloc[t-2]: # and self.zscore.iloc[t] < 4:
                new_signal = -1
            # Short Exit
            elif z_t1 < upper_exit.iloc[t-1] and z_t2 >= upper_exit.iloc[t-2]:
                new_signal = 0
            else:
                new_signal = current_signal

            # # 추가된 위험 관리: 이미 포지션이 열려 있는 경우 위험 임계값 초과 시 강제 청산
            # if current_signal == -1 and self.zscore.iloc[t] > risk_threshold_long:
            #     new_signal = 0
            #     # 옵션: "Stop-loss triggered" 로그 출력
            #     print(f"Stop-loss triggered at index {t} for long position: zscore {self.zscore.iloc[t]:.2f} > {risk_threshold_long}")
            # if current_signal == 1 and self.zscore.iloc[t] < risk_threshold_short:
            #     new_signal = 0
            #     print(f"Stop-loss triggered at index {t} for short position: zscore {self.zscore.iloc[t]:.2f} < {risk_threshold_short}")
            position_list.append(new_signal)
        self.positions['Position'] = position_list

class Backtester:
    def __init__(self, prices, start_date, end_date, transaction_cost=0.0005):
        self.prices = prices
        self.start_date = start_date
        self.end_date = end_date
        self.transaction_cost = transaction_cost
        self.results = pd.DataFrame()
        self.trade_logs = []  # 각 구간별 거래 로그 저장
        print(f"[Backtester] 초기화: {start_date} ~ {end_date}, 수수료: {transaction_cost}")

    def run_backtest(self, ref_days=180, trade_days=7, initial_capital=10000):
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d")
        end_dt = datetime.strptime(self.end_date, "%Y-%m-%d")
        backtest_start = start_dt + pd.Timedelta(days=ref_days)
        date_range = pd.date_range(start=backtest_start, end=end_dt, freq=f'{trade_days}d')

        #all_df = pd.DataFrame()
        df_portfolio = pd.DataFrame({'Exit Time' : [date_range[0]], 'Cumulative Return (%)' : [0]})

        for i in range(len(date_range) - 1):

            if i == 0:
                fixed_capital = initial_capital
            test_start = date_range[i]
            test_end = date_range[i+1]
            train_end = test_start
            train_start = train_end - pd.Timedelta(days=ref_days)
            print(f"\n[Backtester.run_backtest] LOOKBACK: {train_start.date()} ~ {train_end.date()} | Trade: {test_start.date()} ~ {test_end.date()}")

            train_data = self.prices.loc[train_start:train_end]
            test_data = self.prices.loc[test_start:test_end]
            new_train_data = np.log(train_data)
            new_test_data = np.log(test_data)
            combined_data = pd.concat([new_train_data, new_test_data.iloc[1:]])
            print(f"[Backtester.run_backtest] 학습+테스트 데이터 준비 완료, 길이: {len(combined_data)}")

            # 페어 선정
            pair_selector = PairSelector(new_train_data)
            filtered_pairs = pair_selector.run()
            print(f"[Backtester.run_backtest] 필터 통과 페어 수: {len(filtered_pairs)}")

            half_life_estimator = HalfLifeEstimator(new_train_data, filtered_pairs)
            sorted_pairs = half_life_estimator.select_pairs_graph(k=3)

            n_pairs = len(sorted_pairs)
            pair_capitals = { pair: initial_capital / n_pairs for pair, stats_dict in sorted_pairs }

            if n_pairs == 0:
                print(f"[Backtester.run_backtest] 선정된 페어 없음; 구간 스킵")
                continue
            else:
                print(f"[Backtester.run_backtest] 여러 페어 선택: {n_pairs}개, 전체 시작 자본: {initial_capital}, 각 페어 시작 자본: {initial_capital/n_pairs:.2f}")

                for pair, stats_dict in sorted_pairs:

                    print(f"[Backtester.run_backtest] 페어 {pair} 거래 구간 시작, Lookback = {int(round(stats_dict['halflife']))}")
                    current_capital = pair_capitals[pair]
                    strategy = TradingStrategy(combined_data, pair, stats_dict['alpha'], stats_dict['beta'], int(round(stats_dict['halflife'])), new_test_data.iloc[1:].index, self.transaction_cost)
                    strategy.calculate_spread()
                    strategy.calculate_zscore()
                    strategy.generate_signals()
                    signals = strategy.positions

                    if initial_capital == fixed_capital:
                        trade_df = record_trades(signals, test_data, pair, stats_dict['beta'], self.transaction_cost, current_capital)
                    else:
                        trade_df = record_trades(signals, test_data, pair, stats_dict['beta'], self.transaction_cost, current_capital, cum_returns)

                    plot_trade_details(test_data, strategy, signals, trade_df, pair)
                    
                    if not trade_df.empty:
                        print("\n=== Trade Records ===")
                        print(trade_df)
                        cum_returns = trade_df['Cumulative Return (%)'].iloc[-1]
                        updated_capital = trade_df['Capital'].iloc[-1]
                        pair_capitals[pair] = updated_capital
                        print(f"  페어 {pair}: 업데이트된 자본 = {updated_capital:.2f}")
                        print(f"[Backtester.run_backtest] 페어: {pair} 해당 구간 누적 수익률: {trade_df['Cumulative Return (%)'].iloc[-1]:.4f}%")
                    else:
                        print(f"  페어 {pair}: 거래 기록 없음, 자본 유지")
                    
                
                total_capital = sum(pair_capitals.values())
                cumulative_return = (total_capital / fixed_capital) - 1
                print(f"[Backtester.run_backtest] 구간 {i+1} 후 전체 포트폴리오 자본: {total_capital:.2f}, 누적 수익률: {cumulative_return*100:.2f}%")
                df_portfolio = pd.concat([df_portfolio, pd.DataFrame({'Exit Time' : [test_end], 'Cumulative Return (%)' : [cumulative_return]})], ignore_index=True)
                initial_capital = total_capital

        df_portfolio.set_index('Exit Time', inplace=True)
        plt.figure(figsize=(12, 6))
        plt.plot(df_portfolio.index, df_portfolio['Cumulative Return (%)']*100, label='Cumulative Return (%)', lw=2)
        plt.xlabel('Time')
        plt.ylabel('Cumulative Return (%)')
        plt.title('Cumulative Return (%) Over All Trade Periods')
        plt.legend()
        plt.grid(True)
        plt.show()

        overall_capital = sum(pair_capitals.values())
        overall_return = (overall_capital / fixed_capital) - 1
        print(f"\n=== 전체 포트폴리오 최종 자본: {overall_capital:.2f}, 누적 수익률: {overall_return*100:.2f}% ===")
        return df_portfolio
    
    def run_backtest_multiobjective_rolling(self, lookback=30, trade_days=7, initial_capital=10000.0):
        """
        롤링 백테스트 함수:
        - 각 거래 구간마다, lookback일간의 학습 데이터를 사용하여 후보 페어를 선정합니다.
        - 후보 페어 선정은 상관계수 > 0.7, D’Agostino’s K² 검정 (p-value > 0.01) 필터링 후,
            각 페어에 대해 cointegration metric (ADF t-statistic, spread variance, halflife)을 계산하고,
            Pareto frontier를 도출한 후, Elbow Method 기반 클러스터링으로 대표 페어를 선택합니다.
        - 선정된 페어(예시로 대표 페어 중 첫 페어)를 사용해, trade_days 동안 거래를 시뮬레이션하고,
            그 결과로 자본을 업데이트합니다.
        - 거래 구간은 training window가 매 거래마다 trade_days 간격으로 롤링됩니다.
        
        Parameters:
        lookback: 학습 데이터 기간 (일)
        trade_days: 거래 기간 (일)
        initial_capital: 전체 초기 자본 (달러)
        
        Returns:
        전체 포트폴리오의 최종 누적 수익률 (누적 수익률 계산 후)
        """
        print("=== Rolling Multiobjective Backtest 시작 ===")
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d")
        end_dt   = datetime.strptime(self.end_date, "%Y-%m-%d")
        
        overall_capital = initial_capital

        # 생성: training window 시작 날짜 (매 거래 구간마다 training window는 trade_days 간격으로 이동)
        training_starts = pd.date_range(start=start_dt, end=end_dt - pd.Timedelta(days=(lookback+trade_days)), freq=f'{trade_days}d')
        print(f"전체 거래 구간: {len(training_starts)} (각 training window 길이: {lookback}일, 거래 기간: {trade_days}일)")
        
        for ts in training_starts:
            train_start = ts
            train_end   = ts + pd.Timedelta(days=lookback)
            trade_start = train_end
            trade_end   = trade_start + pd.Timedelta(days=trade_days)
            if trade_end > end_dt:
                break
            print(f"\n[롤링 구간] 학습: {train_start.date()} ~ {train_end.date()}, 거래: {trade_start.date()} ~ {trade_end.date()}")
            
            # 1. 후보 페어 선정을 위한 학습 데이터 (로그 변환)
            train_data = self.prices.loc[train_start:train_end]
            new_train_data = np.log(train_data)
            tickers = new_train_data.columns.tolist()
            
            # 초기 후보: 상관계수 > 0.7
            corr_matrix = new_train_data.corr()
            candidate_pairs = []
            for i in range(len(tickers)):
                for j in range(i+1, len(tickers)):
                    if abs(corr_matrix.iloc[i, j]) > 0.7:
                        candidate_pairs.append((tickers[i], tickers[j]))
            print(f"  초기 후보 페어 수 (상관계수 > 0.7): {len(candidate_pairs)}")
            
            # 정규성 필터: D’Agostino’s K² 검정 (병렬 처리)
            def normal_filter(data, pair):
                try:
                    price1 = data[pair[0]]
                    price2 = data[pair[1]]
                    X = sm.add_constant(price2)
                    model = sm.OLS(price1, X).fit()
                    alpha, beta = model.params
                    spread = price1 - beta * price2 - alpha
                    stat, p_val = stats.normaltest(spread.dropna())
                    if p_val > 0.01:
                        return pair
                    return None
                except Exception as e:
                    print(e)
                    return None
            
            candidate_pairs_filtered = Parallel(n_jobs=-1, verbose=10)(
                delayed(normal_filter)(new_train_data, pair) for pair in candidate_pairs
            )
            candidate_pairs_filtered = [r for r in candidate_pairs_filtered if r is not None]
            print(f"  후보 페어 수 after D’Agostino’s K²: {len(candidate_pairs_filtered)}")
            
            # 2. 각 후보 페어에 대해 cointegration metric 계산 (병렬 처리)
            def compute_metrics(pair, data):
                try:
                    price1 = data[pair[0]]
                    price2 = data[pair[1]]
                    X = sm.add_constant(price2)
                    model = sm.OLS(price1, X).fit()
                    alpha, beta = model.params
                    spread = price1 - beta * price2 - alpha
                    adf_result = adfuller(spread.dropna())
                    t_stat = adf_result[0]
                    var_spread = spread.var()
                    delta_spread = spread.diff().dropna()
                    spread_lag = spread.shift(1).dropna()
                    spread_lag, delta_spread = spread_lag.align(delta_spread, join='inner')
                    theta_model = sm.OLS(delta_spread, sm.add_constant(spread_lag)).fit()
                    theta = -theta_model.params[0]
                    if theta > 0:
                        halflife = np.log(2)/theta
                    else:
                        halflife = np.nan
                    if np.isnan(halflife):
                        return None
                    return {'pair': pair, 't_stat': t_stat, 'variance': var_spread, 'halflife': halflife}
                except Exception as e:
                    print(f"Error computing metrics for {pair}: {e}")
                    return None

            metrics = Parallel(n_jobs=-1, verbose=10)(
                delayed(compute_metrics)(pair, new_train_data) for pair in candidate_pairs_filtered
            )
            metrics = [m for m in metrics if m is not None]
            df_metrics = pd.DataFrame(metrics)
            print(f"  Metrics computed for {len(df_metrics)} candidate pairs.")
            
            # 3. Pareto Frontier 계산
            def is_dominated(row, df):
                cond1 = (df['t_stat'] <= row['t_stat'])
                cond2 = (df['variance'] <= row['variance'])
                cond3 = (df['halflife'] <= row['halflife'])
                cond_strict = ((df['t_stat'] < row['t_stat']) | (df['variance'] < row['variance']) | (df['halflife'] < row['halflife']))
                return ((cond1 & cond2 & cond3) & cond_strict).any()

            pareto_mask = []
            for i, row in df_metrics.iterrows():
                dominated = is_dominated(row, df_metrics.drop(i))
                pareto_mask.append(not dominated)
            df_pareto = df_metrics[pareto_mask]
            print(f"  Pareto frontier pairs: {len(df_pareto)}")
            
            # 4. Elbow Method: 클러스터링하여 대표 페어 선정
            if len(df_pareto) > 1:
                scaler = MinMaxScaler()
                norm_data = scaler.fit_transform(df_pareto[['t_stat', 'variance', 'halflife']])
                inertias = []
                K_range = range(1, min(10, len(norm_data)) + 1)
                for k in K_range:
                    kmeans = KMeans(n_clusters=k, random_state=42).fit(norm_data)
                    inertias.append(kmeans.inertia_)
                deltas = np.diff(inertias)
                if len(deltas) > 1:
                    elbow_k = np.argmin(deltas[1:]) + 2
                else:
                    elbow_k = 1
                print(f"  Elbow method selected clusters: {elbow_k}")
                kmeans = KMeans(n_clusters=elbow_k, random_state=42).fit(norm_data)
                df_pareto['cluster'] = kmeans.labels_
                df_pareto['score'] = df_pareto[['t_stat', 'variance', 'halflife']].sum(axis=1)
                final_pairs = []
                for cluster in df_pareto['cluster'].unique():
                    cluster_df = df_pareto[df_pareto['cluster'] == cluster]
                    best_pair = cluster_df.loc[cluster_df['score'].idxmin()]['pair']
                    final_pairs.append(best_pair)
                print(f"  Final representative pairs: {final_pairs}")
            else:
                final_pairs = df_pareto['pair'].tolist()
                print("  No clustering applied; using all Pareto frontier pairs.")
            
            # 5. 백테스트 시뮬레이션: 해당 거래 구간에 대해 선정된 페어(예시로 첫 페어 사용)
            selected_pair = final_pairs[0]
            print(f"  Selected pair for trade: {selected_pair}")
            test_data_period = self.prices.loc[trade_start:trade_end, list(selected_pair)]
            new_train_period = np.log(self.prices.loc[train_start:train_end, list(selected_pair)])
            new_test_period = np.log(test_data_period)
            combined_data = pd.concat([new_train_period, new_test_period.iloc[1:]])
            X = sm.add_constant(new_train_period[selected_pair[1]])
            model = sm.OLS(new_train_period[selected_pair[0]], X).fit()
            alpha, beta = model.params
            lookback = int(round(lookback))
            strategy = TradingStrategy(combined_data, selected_pair, alpha, beta, lookback, new_test_period.iloc[1:].index, self.transaction_cost)
            strategy.calculate_spread()
            strategy.calculate_zscore()
            strategy.generate_signals()
            signals = strategy.positions
            trade_df = record_trades(signals, test_data_period, selected_pair, beta, self.transaction_cost, capital=overall_capital)
            if not trade_df.empty:
                overall_capital = trade_df['Capital'].iloc[-1]
                print(f"  Updated overall capital: {overall_capital:.2f}")
            else:
                print("  No trades executed in this period.")
        print(f"=== Overall final capital: {overall_capital:.2f} ===")
        overall_return = (overall_capital / initial_capital) - 1
        print(f"=== Overall cumulative return: {overall_return*100:.2f}% ===")
        return overall_return
   
    def calculate_performance_metrics(self, returns_series):
        total_return = returns_series.iloc[-1] - 1
        trading_days = (returns_series.index[-1] - returns_series.index[0]).days
        annual_factor = 252 / trading_days if trading_days > 0 else 1
        cagr = (returns_series.iloc[-1]) ** annual_factor - 1
        sharpe = returns_series.pct_change().mean() / returns_series.pct_change().std() * np.sqrt(252)
        # 최대 낙폭 계산
        cumulative = (1 + returns_series).cumprod()
        peak = cumulative.cummax()
        drawdown = (cumulative - peak) / peak
        max_drawdown = drawdown.min()
        return {'Total Return': total_return, 'CAGR': cagr, 'Sharpe Ratio': sharpe, 'Max Drawdown': max_drawdown}

    def plot_performance(self, all_returns):
        plt.figure(figsize=(12,6))
        plt.plot(all_returns.index, all_returns['Overall_Cumulative'], label='Overall Cumulative Return', lw=2)
        plt.xlabel('Time')
        plt.ylabel('Cumulative Returns')
        plt.title('Overall Cumulative Returns')
        plt.legend()
        plt.grid(True)
        plt.show()
        metrics = self.calculate_performance_metrics(all_returns['Returns'])
        print("\n=== Performance Metrics ===")
        for k, v in metrics.items():
            print(f"{k}: {v:.4f}")

def plot_trade_details(test_data, strategy, signals, trade_df, pair):
    # 임계값(threshold) 계산
    long_entry_threshold = strategy.zscore_st * 3          # 예: red
    short_entry_threshold = -strategy.zscore_st * 3        # 예: green
    long_exit_threshold = strategy.zscore_st            # 예: blue
    short_exit_threshold = -strategy.zscore_st            # 예: purple

    fig, ax = plt.subplots(figsize=(14, 7))
    # z-score 플롯
    ax.plot(strategy.zscore.index, strategy.zscore, label="Z-Score", color='black', lw=2)
    # 임계값 플롯 (각각 다른 색상과 linestyle)
    ax.plot(strategy.zscore.index, long_entry_threshold, label="Long Entry Threshold", linestyle='--', color='red')
    ax.plot(strategy.zscore.index, short_entry_threshold, label="Short Entry Threshold", linestyle='--', color='green')
    ax.plot(strategy.zscore.index, long_exit_threshold, label="Long Exit Threshold", linestyle='-.', color='blue')
    ax.plot(strategy.zscore.index, short_exit_threshold, label="Short Exit Threshold", linestyle='-.', color='purple')
    
    # Entry와 Exit 지점 찾기 (단순히 신호가 0에서 non-zero로 전환, 또는 그 반대)
    prev_signal = signals['Position'].shift(1)
    current_signal = signals['Position']
    entry_idx = signals.index[(prev_signal == 0) & (current_signal != 0)]
    exit_idx = signals.index[(prev_signal != 0) & (current_signal == 0)]
    
    # Entry 지점 마커 (예: 위쪽 삼각형)
    for t in entry_idx:
        ax.plot(t, strategy.zscore.loc[t], marker='^', color='magenta', markersize=10, label="Entry")
    # Exit 지점 마커 (예: 아래쪽 삼각형)
    for t in exit_idx:
        ax.plot(t, strategy.zscore.loc[t], marker='v', color='cyan', markersize=10, label="Exit")
    
    # 중복 범례 제거
    handles, labels = ax.get_legend_handles_labels()
    unique = dict(zip(labels, handles))
    ax.legend(unique.values(), unique.keys())
    
    ax.set_xlabel("Time")
    ax.set_ylabel("Z-Score")
    ax.set_title(f"Spread Z-Score, Thresholds, and Trade Points for Pair {pair}")
    plt.tight_layout()
    plt.show()

    # print("\n=== Trade Records ===")
    # print(trade_df)

def record_trades(signals, test_data, pair, hedge_ratio, transaction_cost, capital=10000.0, cumulative_return=1.0):
    positions = signals['Position']
    trades = []
    current_trade = None

    for t in positions.index:
        pos = positions.loc[t]
        if current_trade is None:
            if pos != 0:
                # 진입 신호: 0에서 non-zero
                current_trade = {
                    'Entry Time': t,
                    'Entry Position': pos,
                    'Entry Price A': test_data.loc[t, pair[0]],
                    'Entry Price B': test_data.loc[t, pair[1]]
                }
        else:
            if pos == 0:
                # 청산 신호: non-zero에서 0으로 전환
                current_trade['Exit Time'] = t
                current_trade['Exit Price A'] = test_data.loc[t, pair[0]]
                current_trade['Exit Price B'] = test_data.loc[t, pair[1]]
                trades.append(current_trade)
                current_trade = None

    # 만약 거래가 아직 열려있다면 마지막 시점에서 청산 처리
    if current_trade is not None:
        t = positions.index[-1]
        current_trade['Exit Time'] = t
        current_trade['Exit Price A'] = test_data.loc[t, pair[0]]
        current_trade['Exit Price B'] = test_data.loc[t, pair[1]]
        trades.append(current_trade)
    
    trade_records = []
    # 자본 할당 계산: total weight = 1 + |hedge_ratio|
    total_weight = 1 + abs(hedge_ratio)
    capital_A = capital * (1 / total_weight)
    capital_B = capital * (abs(hedge_ratio) / total_weight)
    
    initial_capital = capital
    current_capital = capital
    for trade in trades:
        entry_pos = trade['Entry Position']
        if entry_pos == 1:
            # Long 거래: Asset A long, Asset B short
            ret_A = (trade['Exit Price A'] - trade['Entry Price A']) / trade['Entry Price A']
            ret_B = (trade['Entry Price B'] - trade['Exit Price B']) / trade['Entry Price B']
            profit_A = capital_A * ret_A
            profit_B = capital_B * ret_B
            trade_profit = profit_A + profit_B
        elif entry_pos == -1:
            # Short 거래: Asset A short, Asset B long
            ret_A = (trade['Entry Price A'] - trade['Exit Price A']) / trade['Entry Price A']
            ret_B = (trade['Exit Price B'] - trade['Entry Price B']) / trade['Entry Price B']
            profit_A = capital_A * ret_A
            profit_B = capital_B * ret_B
            trade_profit = profit_A + profit_B
        else:
            trade_profit = 0
        
        # 수수료 계산: 진입과 청산 시 각각 전체 투자액에 대해 적용 (여기서는 단순 계산)
        fee = transaction_cost * (capital_A + capital_B) * 2  
        trade_profit -= fee
        
        current_capital += trade_profit
        cumulative_return = (current_capital / initial_capital) - 1
        
        trade_record = {
            'Entry Time': trade['Entry Time'],
            'Exit Time': trade['Exit Time'],
            'Entry Price A': trade['Entry Price A'],
            'Exit Price A': trade['Exit Price A'],
            'Entry Price B': trade['Entry Price B'],
            'Exit Price B': trade['Exit Price B'],
            'Trade Profit': trade_profit,
            'Trade Return (%)': trade_profit / initial_capital * 100,
            'Capital': current_capital,
            'Cumulative Return (%)': cumulative_return * 100
        }
        trade_records.append(trade_record)
    trade_df = pd.DataFrame(trade_records)
    return trade_df

# 실행 코드 부분 아래 셀로 분리
'''
if __name__ == '__main__':
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
    start_date = "2023-10-01"
    end_date = "2025-01-01"
    backtester = Backtester(price_30_min_filltered, start_date, end_date) # 원본코드 수정 price_5min_filter -> price_30_min_filltered

    # 싱글 페어 백테스트 (반감기 기준)
    results_single = backtester.run_backtest(90, 30, 10000)
'''

#====================================================================================================
#                              전체 페어에 대해서 벤치마크 구하는 함수 시작
#====================================================================================================
def simulate_pair_backtest(pair, prices, ref_days, trade_days, capital_per_pair, transaction_cost):

    current_capital = capital_per_pair
    trade_dfs = []
    
    # 전체 데이터 기간
    start_dt = prices.index.min()
    end_dt = prices.index.max()
    # 백테스트 시작일: ref_days 후
    backtest_start = start_dt + pd.Timedelta(days=ref_days)
    date_range = pd.date_range(start=backtest_start, end=end_dt, freq=f'{int(trade_days)}d')
    
    # 거래 구간마다 시뮬레이션 진행
    for i in range(len(date_range) - 1):
        test_start = date_range[i]
        test_end = date_range[i+1]
        train_end = test_start
        train_start = train_end - pd.Timedelta(days=ref_days)
        if train_start < start_dt or test_end > end_dt:
            continue
        
        # 해당 구간 가격 데이터 추출 (로그 변환)
        train_data = prices.loc[train_start:train_end, list(pair)]
        test_data = prices.loc[test_start:test_end, list(pair)]
        if len(train_data) < 5 or len(test_data) < 5:
            continue
        new_train_data = np.log(train_data)
        new_test_data = np.log(test_data)
        combined_data = pd.concat([new_train_data, new_test_data.iloc[1:]])
        
        # OLS 회귀로 alpha, beta 산출 (학습 데이터 사용)
        X = sm.add_constant(new_train_data[pair[1]])
        model = sm.OLS(new_train_data[pair[0]], X).fit()
        alpha, beta = model.params
        
        # lookback 설정 (여기서는 ref_days의 정수값 사용)
        lookback = int(round(ref_days))
        # TradingStrategy 생성: test_index는 new_test_data의 index (첫 행 제외)
        strategy = TradingStrategy(combined_data, pair, alpha, beta, lookback, new_test_data.iloc[1:].index, transaction_cost)
        strategy.calculate_spread()
        strategy.calculate_zscore()
        strategy.generate_signals()
        signals = strategy.positions
        
        # 각 거래 구간에 대해 record_trades를 실행하여 거래 기록 산출
        trade_df = record_trades(signals, test_data, pair, beta, transaction_cost, capital=current_capital)
        if not trade_df.empty:
            # 마지막 거래의 자본을 업데이트하여 다음 구간에서 동일 자본으로 거래 진행
            current_capital = trade_df['Capital'].iloc[-1]
            trade_dfs.append(trade_df)
    
    if trade_dfs:
        pair_trade_df = pd.concat(trade_dfs)
    else:
        pair_trade_df = pd.DataFrame()
    
    return pair, current_capital, pair_trade_df

# 전체 백테스트를 모든 페어에 대해 병렬 실행하는 함수
def benchmark_all_pairs_parallel(prices, ref_days, trade_days, initial_capital=10000.0, transaction_cost=0.0005):
    print("=== 전체 페어 백테스트 시작 ===")
    # Unique 페어 생성 (중복 제거)
    tickers = prices.columns.tolist()
    pairs = [(tickers[i], tickers[j]) for i in range(len(tickers)) for j in range(i+1, len(tickers))]
    n_pairs = len(pairs)
    print(f"전체 unique 페어 개수: {n_pairs}")
    
    # 각 페어에 동일하게 할당할 자본 계산
    capital_per_pair = initial_capital / n_pairs
    print(f"각 페어당 할당 자본: {capital_per_pair:.2f}")
    
    # 모든 페어에 대해 병렬 시뮬레이션 실행
    results = Parallel(n_jobs=-1, verbose=10)(
        delayed(simulate_pair_backtest)(pair, prices, ref_days, trade_days, capital_per_pair, transaction_cost)
        for pair in pairs
    )
    
    # 결과 합산: 각 페어의 최종 자본을 모두 합산하여 전체 포트폴리오 자본 산출
    overall_capital = sum(result[1] for result in results)
    overall_return = (overall_capital / initial_capital) - 1
    print(f"전체 포트폴리오 최종 자본: {overall_capital:.2f}")
    print(f"전체 포트폴리오 누적 수익률: {overall_return*100:.2f}%")
    
    # 결과 출력용: 각 페어의 거래 기록을 딕셔너리로 정리 (필요 시)
    pair_trade_records = {result[0]: result[2] for result in results if not result[2].empty}
    print("=== 전체 페어 백테스트 종료 ===")
    return overall_return, pair_trade_records


"""
#====================================================================================================  
#                              과거 참조 기간, 거래 기간 baysian optimization
#====================================================================================================

# from bayes_opt import BayesianOptimization

# def optimize_run_backtest():
#     # Backtester 인스턴스 생성 (이미 price_5min, start_date, end_date 등이 정의되어 있다고 가정)
#     backtester = Backtester(price_5min, "2023-06-01", "2025-01-01", transaction_cost=0.0005)
    
#     def objective(ref_days, trade_days):
#         # 파라미터를 int로 변환
#         ref_days_int = int(round(ref_days))
#         trade_days_int = int(round(trade_days))
#         print(f"\n[Optimization] Testing with ref_days={ref_days_int}, trade_days={trade_days_int}")
        
#         # run_backtest 실행 (여기서는 초기 자본 10000 사용)
#         df_portfolio = backtester.run_backtest(ref_days_int, trade_days_int, 10000)
        
#         # 최종 누적 수익률(%)을 불러와서 소수 형태로 변환 (예: 0.15는 15% 누적 수익률)
#         final_return_pct = df_portfolio['Cumulative Return (%)'].iloc[-1]
#         final_return = final_return_pct / 100.0
#         print(f"[Opti smization] Final return: {final_return:.4f}")
        
#         # Bayesian Optimization에서는 최대화 문제로 설정하므로, 최종 누적 수익률이 클수록 좋은 값으로 간주
#         return final_return

#     # 각 파라미터 범위: ref_days는 1~180, trade_days는 1~30
#     optimizer = BayesianOptimization(
#         f=objective,
#         pbounds={'ref_days': (1, 180), 'trade_days': (1, 30)},
#         random_state=42,
#     )

#     # 초기 탐색 포인트와 최적화 반복 횟수를 조절할 수 있음 (실행 시간이 길어질 수 있으므로 적절히 조절)
#     optimizer.maximize(init_points=3, n_iter=5)

#     print("\n[Optimization] Best parameters found:")
#     print(optimizer.max)
    
# # 최적화 실행
# optimize_run_backtest()"
"""

print(' ')

 


# Run

In [6]:
data = price_30_min_filltered
start_date = "2023-10-01"
end_date = "2025-03-19"
backtester = Backtester(data, start_date, end_date)
# 싱글 페어 백테스트 (반감기 기준)
results_single = backtester.run_backtest(90, 30, 10000)

IndentationError: unexpected indent (1901391357.py, line 4)

In [9]:
display(price_30_min_filltered.head())
price_30_min_filltered.tail()

Unnamed: 0_level_0,CVC,RLC,RAD,UTK,AERGO,BTC,ETH,XRP,SOL,BNB,...,CELO,BEL,DASH,CVX,AGLD,DEXE,SCRT,ZEC,FARM,ACA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-06-01 00:00:00,0.0843,1.523,1.893,0.0926,0.1087,27281.27,1886.73,0.5129,20.97,307.3,...,0.505,0.79,42.04,4.188,0.445,2.419,0.394,32.3,27.08,0.0487
2023-06-01 00:30:00,0.0836,1.512,1.91,0.092,0.1082,27065.77,1871.27,0.5082,20.78,306.1,...,0.501,0.7788,41.84,4.156,0.441,2.412,0.39,32.1,26.99,0.0493
2023-06-01 01:00:00,0.0839,1.523,1.93,0.0922,0.1086,27140.9,1877.97,0.5088,20.82,306.7,...,0.503,0.7866,42.02,4.169,0.441,2.408,0.39,32.1,27.04,0.0487
2023-06-01 01:30:00,0.0838,1.518,1.914,0.0918,0.1085,27078.56,1874.17,0.5088,20.77,306.7,...,0.502,0.7827,41.87,4.161,0.439,2.409,0.391,32.1,27.09,0.0486
2023-06-01 02:00:00,0.0834,1.513,1.93,0.0916,0.1082,27076.68,1872.26,0.5075,20.71,306.0,...,0.503,0.7815,41.62,4.148,0.439,2.41,0.391,32.1,27.06,0.0484


Unnamed: 0_level_0,CVC,RLC,RAD,UTK,AERGO,BTC,ETH,XRP,SOL,BNB,...,CELO,BEL,DASH,CVX,AGLD,DEXE,SCRT,ZEC,FARM,ACA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-03-18 22:00:00,0.0987,1.272,0.804,0.03869,0.0783,82274.99,1911.7,2.2663,124.65,630.06,...,0.3498,0.6263,23.26,2.04,0.966,18.32,0.2022,33.34,28.12,0.0429
2025-03-18 22:30:00,0.0997,1.277,0.806,0.03867,0.0778,82395.74,1917.93,2.2809,125.02,629.57,...,0.3512,0.6313,23.64,2.052,0.975,18.26,0.2042,33.38,28.28,0.0432
2025-03-18 23:00:00,0.1002,1.284,0.808,0.03884,0.0784,82777.36,1933.76,2.2903,125.38,627.39,...,0.3537,0.6313,23.98,2.056,0.988,18.31,0.2047,33.43,28.4,0.0433
2025-03-18 23:30:00,0.1005,1.28,0.809,0.0388,0.0787,82715.03,1931.54,2.2851,125.33,627.81,...,0.3537,0.6312,24.12,2.069,0.984,18.365,0.2041,33.14,28.41,0.0434
2025-03-19 00:00:00,0.0999,1.28,0.809,0.0388,0.0779,82684.35,1931.17,2.2838,125.25,627.97,...,0.3539,0.6299,24.07,2.067,0.979,18.334,0.2037,33.13,28.41,0.0433


In [10]:
price_30_min_filltered.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 31537 entries, 2023-06-01 00:00:00 to 2025-03-19 00:00:00
Columns: 137 entries, CVC to ACA
dtypes: float64(137)
memory usage: 33.2 MB


In [13]:
price_30_min_filltered.describe()

Unnamed: 0,CVC,RLC,RAD,UTK,AERGO,BTC,ETH,XRP,SOL,BNB,...,CELO,BEL,DASH,CVX,AGLD,DEXE,SCRT,ZEC,FARM,ACA
count,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,...,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0,31537.0
mean,0.118728,1.88275,1.493454,0.067362,0.118467,58099.255911,2628.659955,0.8987,119.900833,455.19891,...,0.632326,0.681608,30.06163,3.132588,1.076374,8.082052,0.352036,31.861462,41.440692,0.077804
std,0.036979,0.8131,0.426973,0.022318,0.028573,23641.599949,697.144881,0.739686,68.840273,177.693252,...,0.211037,0.200832,6.208061,0.986084,0.419535,5.392939,0.130674,10.178253,15.214974,0.032269
min,0.0645,0.919,0.69,0.02929,0.059,24914.73,1525.96,0.3979,13.93,203.7,...,0.2993,0.305,19.21,1.434,0.387,1.885,0.1629,16.26,20.46,0.031
25%,0.0901,1.279,1.193,0.05135,0.0956,36420.09,1932.15,0.5184,54.31,245.3,...,0.4649,0.5632,25.67,2.395,0.775,2.827,0.2487,25.38,27.62,0.056
50%,0.1087,1.59,1.447,0.06653,0.1101,60778.59,2561.53,0.5735,137.11,537.4,...,0.5816,0.6368,28.73,2.932,1.025,7.933,0.3361,29.44,40.95,0.0672
75%,0.1428,2.248,1.712,0.0804,0.1389,69303.99,3269.75,0.6509,169.9,597.1,...,0.7466,0.751,32.9,3.706,1.342,12.071,0.4123,33.59,48.55,0.1014
max,0.3086,4.667,3.535,0.16999,0.2617,108706.15,4086.66,3.3805,291.18,792.4,...,1.7994,2.5619,70.94,8.113,3.038,23.976,0.8543,77.71,111.31,0.23
