In [1]:
import os
import mplfinance as mpf
import pandas as pd
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import adtk
from adtk.detector import LevelShiftAD
from adtk.visualization import plot
import datetime
import matplotlib.pyplot as plt

In [2]:
os.chdir("/Users/sisishao/Downloads/FinxTech/ETHUSD/ETH_data/2022")
month = input("Please input month in double digit format ")
day = input("Please input day in double digit format ")

Please input month in double digit format 05
Please input day in double digit format 23


In [3]:
df = pd.read_csv("ETHUSD2022-{}-{}_index_price.csv".format(month,day))

In [4]:
df["start_at"] = pd.to_datetime(df["start_at"],unit='s')

In [5]:
df.set_index("start_at",inplace=True)

In [6]:
df

Unnamed: 0_level_0,symbol,period,open,high,low,close
start_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-05-23 00:00:00,ETHUSD,1,2040.93,2041.05,2038.13,2038.14
2022-05-23 00:01:00,ETHUSD,1,2038.14,2038.51,2035.73,2035.92
2022-05-23 00:02:00,ETHUSD,1,2035.92,2038.51,2035.14,2038.51
2022-05-23 00:03:00,ETHUSD,1,2038.51,2040.83,2037.79,2037.79
2022-05-23 00:04:00,ETHUSD,1,2037.79,2039.86,2037.77,2039.86
...,...,...,...,...,...,...
2022-05-23 23:55:00,ETHUSD,1,1971.86,1972.61,1971.25,1971.35
2022-05-23 23:56:00,ETHUSD,1,1971.35,1971.36,1970.77,1970.80
2022-05-23 23:57:00,ETHUSD,1,1970.80,1971.23,1970.00,1970.83
2022-05-23 23:58:00,ETHUSD,1,1970.83,1971.99,1970.04,1970.59


In [7]:
class AnomalyDectector:
    def __init__(self, df=None,freq = '15min', backward_window=4, threshold=0.45):
        # Constructor method
        self.df = df
        self.freq = freq
        self.backward_window = backward_window
        self.threshold = threshold
    
    def freq_desired(self):
        #Resample the DataFrame to 15-minute intervals and calculate OHLCV
        if self.freq!="1min":
            df_resampled = self.df.resample(self.freq).agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last'})
        else:
            df_resampled = self.df
        diff = df_resampled['high'] - df_resampled['low']
        weight = []
        for w in range(self.backward_window):
            weight.append(np.exp(-0.5*(self.backward_window-w)))
        anomaly_indices = []
        for i in range(self.backward_window, len(diff)):
            smoothing_estimate = sum(diff[i-self.backward_window:i] * weight)
            if smoothing_estimate + self.threshold <= diff[i]:
                anomaly_indices.append(i)
        return df_resampled,anomaly_indices
    
    def freq_desired2(self):
        #Resample the DataFrame to 15-minute intervals and calculate OHLCV
        if self.freq =="1min":
            df_resampled = self.df
        else:
            df_resampled = self.df.resample(self.freq).agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last'})
        rolling_vol = (df_resampled['high'] - df_resampled['low']).rolling(self.backward_window).std()
        weight = []
        for w in range(self.backward_window):
            weight.append(np.exp(-0.5*(self.backward_window-w)))
        anomaly_indices = []
        for i in range(self.backward_window, len(rolling_vol)):
            smoothing_estimate = sum(rolling_vol[i-self.backward_window:i] * weight)
            if smoothing_estimate + self.threshold <= rolling_vol [i]:
                anomaly_indices.append(i)
        return df_resampled,anomaly_indices
        

    def anomaly_dynamic_smoothing(self,kernel='vol'):
        '''
        backward_window: an interger type representing the number of periods that the user wants to look back

        threshold: a float type which represents the minimum effective bound that the current value 
            compared with the weighted average of backward_window values. Such threshold can be estimated
            from historical data with more advanced statistical tools, such as method of moments (aka sample mean).
        freq: an integer representing frequency of trading data ; can be chosen from 1min,5mins,15mins,30mins
        kernel: a string representing the estimator desired, can be "vol" or "smoothing"
        '''
        if kernel == 'vol':
            df_resampled,anomaly_indices = self.freq_desired2()
        elif kernel == 'smoothing':
            df_resampled,anomaly_indices = self.freq_desired()
            
        s = df_resampled["close"]
        level_shift_ad = LevelShiftAD(c=5, side='both', window=5) 
        anomalies = level_shift_ad.fit_detect(df_resampled["close"])   
        ind = anomalies[anomalies==False].index
        df_resampled.reset_index(inplace=True)
        aux = [ca in ind for ca in df_resampled['start_at']]
        adtk_indices = df_resampled.index[aux].tolist()
       
        # Create the candlestick chart
        fig = go.Figure()

        # Create the subplot grid
        fig = make_subplots(rows=5, cols=1, shared_xaxes=True)

        # Add candlestick trace
        fig.add_trace(go.Candlestick(x=df_resampled['start_at'], open=df_resampled['open'], high=df_resampled['high'], 
                                           low=df_resampled['low'], close=df_resampled['close']), row=1, col=1)


        fig.add_trace(go.Scatter(x=[df_resampled['start_at'][i] for i in anomaly_indices],\
                                 y=[df_resampled['close'] for i in anomaly_indices],
                                 mode='markers', marker=dict(color='red', size=3), name='Marked Points'),row=3,col=1)

  
        fig.add_trace(go.Scatter(x=[df_resampled['start_at'][i] for i in adtk_indices],\
                                 y=[df_resampled['close'] for i in adtk_indices],
                                 mode='markers', marker=dict(color='red', size=3), name='Marked Points'),row=5,col=1)

        fig.update_layout(
            title='Candlestick Chart',
            yaxis_title='Price'
        )

        fig.show()


        return len(anomaly_indices)




In [9]:
a =AnomalyDectector(df=df,freq = '30min')
a.anomaly_dynamic_smoothing('vol')

6

In [None]:
mod = []
for i,dt in enumerate(quantile_Method):
    minute = int(dt.strftime('%M'))
    if minute%5 != 0:
        minute2 = (minute//5+1)*5
        try:
            datetime_str = f"{quantile_Method[i].strftime('%Y')}-{quantile_Method[i].strftime('%m')}-{quantile_Method[i].strftime('%d')} {quantile_Method[i].strftime('%H')}:{minute2}:{quantile_Method[i].strftime('%S')}"
            datetime_obj = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
            mod.append(datetime_obj)
        except:
            h = int(quantile_Method[i].strftime('%H'))+1
            datetime_str = f"{quantile_Method[i].strftime('%Y')}-{quantile_Method[i].strftime('%m')}-{quantile_Method[i].strftime('%d')} {str(h)}:{00}:{quantile_Method[i].strftime('%S')}"
            datetime_obj = datetime.strptime(datetime_str, '%Y-%m-%d %H:%M:%S')
            mod.append(datetime_obj)
    else:
        mod.append(dt)


In [11]:
os.chdir("/Users/sisishao/Downloads/FinXTech/ETHUSD/ETH_data/2023")


In [176]:
def df_creation(start_month,start_day,directory_path="/Users/sisishao/Downloads/FinXTech/ETHUSD/ETH_data/2023",start_hour='00',start_minute='00',start_second='00'):
    df_2023 = pd.DataFrame()
    for f in os.listdir(directory_path):
        df_2023 = pd.concat([df_2023, pd.read_csv(f)], axis=0)
    df_2023["start_at"] = pd.to_datetime(df_2023["start_at"], unit='s')
    df_2023 = df_2023.sort_values("start_at")
    df_2023.set_index("start_at",inplace=True)
    df_2023 = df_2023.loc["2023-{}-{} {}:{}:{}".format(start_month,start_day,start_hour,start_minute,start_second):]
    df_2023.reset_index(inplace=True)
    return df_2023

In [177]:
df = df_creation('01','01',start_hour='00',start_minute='00',start_second='00')

In [178]:
class Quantile:
    def __init__(self,df = df,quantile=0.9,freq='5min',date_threshold="2023-05-31 00:00:00",prediction_horizon = 5):
        self.df = df
        self.quantile = quantile
        self.date_threshold = date_threshold
        self.prediction_horizon = prediction_horizon
        self.freq = freq
    def df_creation(self):
        df = self.df.set_index(self.df["start_at"],inplace=True)
        if self.freq!="1min":
            df_resampled = self.df.resample(self.freq).agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last'})
        else:
            df_resampled = self.df
        df_resampled.reset_index(inplace=True)
        training = df_resampled[df_resampled['start_at'] < pd.to_datetime(self.date_threshold)]
        crit1 = df_resampled['start_at'] >= pd.to_datetime(self.date_threshold)
        crit2 = df_resampled['start_at'] <=  pd.to_datetime(self.date_threshold)  + \
                                                      pd.Timedelta(minutes=self.prediction_horizon)
        testing =  df_resampled[crit1&crit2]
        return training,testing
    def quantile_anomaly(self):
        training,testing = self.df_creation()
        
        aux1 = testing["close"].pct_change() >= (training["close"]).pct_change().quantile(self.quantile)
        aux2 = testing["close"].pct_change() <= (training["close"]).pct_change().quantile(1-self.quantile)
        anomaly_indices = pd.concat([aux1,aux2])
        anomaly_indices = anomaly_indices[anomaly_indices==True]
        return anomaly_indices.index.tolist()
    def plot(self):
        training,testing = self.df_creation()
        anomaly_indices = self.quantile_anomaly()
        # Create the candlestick chart
        fig = go.Figure()

        # Create the subplot grid
        fig = make_subplots(rows=5, cols=1, shared_xaxes=True)

        # Add candlestick trace
        fig.add_trace(go.Candlestick(x=testing['start_at'], open=testing['open'], high=testing['high'], 
                                           low=testing['low'], close=testing['close']), row=1, col=1)


        fig.add_trace(go.Scatter(x=[testing['start_at'][i] for i in anomaly_indices],\
                                 y=[testing['close'][i] for i in anomaly_indices],
                                 mode='markers', marker=dict(color='red', size=3), name='Marked Points'),row=3,col=1)


        fig.update_layout(
            title='Candlestick Chart',
            yaxis_title='Price'
        )

        fig.show()
    def return_plot(self):
        training = self.df_creation()[0]
        # Plot the distribution
        training["close"].pct_change().hist()
        # Add labels and tibtle
        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.title('Distribution of Closing Returns')
        # Display the plotbxx
        plt.show()




In [188]:
test2 = Quantile(prediction_horizon = 60*10,quantile=0.9,freq='15min')
test2.plot()