In [None]:
## kalman 2.0

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt
import os
from numpy import dot
from numpy import sum, tile, linalg, log, pi, exp
from glob import glob
from numpy.linalg import inv, det
import math as m

# Section 1 : Basic Functions


In [5]:
def read_file(filename):
    return pd.read_csv(filename)

def get_data(timeframe):
    return read_file(r"C:\Users\ayush\Desktop\IITB\ZeltaLabPS\BackTesting\data\data\btcusdt_" + timeframe + "_val.csv")

time_frame = "1h"
df = get_data(time_frame)
df.head()

Unnamed: 0,datetime,open,high,low,close,volume
0,2020-07-24 23:30:00,9557.66,9589.0,9556.0,9580.01,1927.603117
1,2020-07-25 00:30:00,9580.0,9583.8,9560.0,9581.76,1236.182213
2,2020-07-25 01:30:00,9581.76,9620.0,9567.23,9615.29,2119.244392
3,2020-07-25 02:30:00,9615.32,9637.0,9588.08,9591.55,2088.164018
4,2020-07-25 03:30:00,9591.55,9593.46,9522.0,9533.92,2069.404213


In [6]:
def print_buckets(df, alphas, rets = ['ret_10', 'ret_30', 'ret_60', 'ret_300', 'tick_10'], aggfunc = ['mean', 'median', 'count'], buckets = 10):
# def print_buckets(df, alphas, rets = ['ret_10', 'ret_30', 'iv_10'], aggfunc = ['mean', 'median', 'count']):
# def print_buckets(df, alphas, rets = ['tick_10', 'tick_30', 'tick_60', 'tick_300'], aggfunc = ['mean', 'median', 'count']):
# def print_buckets(df, alphas, rets = ['ret_60', 'ret_300', 'iv_60'], aggfunc = ['mean', 'median', 'count']):
    for alpha in alphas:
        msg = df.pivot_table(index = pd.qcut(df[alpha],buckets,duplicates='drop'), values=rets, aggfunc=aggfunc)
        print(msg)

In [7]:
def print_corrs(df, alphas, rets = ['ret_10', 'ret_30', 'ret_60', 'ret_300']):
# def print_corrs(df, alphas, rets = ['ret_10', 'ret_30', 'iv_10']):
# def print_corrs(df, alphas, rets = ['tick_10', 'tick_30', 'tick_60', 'tick_300']):
# def print_corrs(df, alphas, rets = ['ret_60', 'ret_300', 'iv_60']):
    msg = "                                      "
    for ret in rets:
        msg += f"{ret:>8s}"
    print(msg)
    for alpha in alphas:
        msg = f"{alpha:30s} corr -> "
        for ret in rets:
            msg += f"{df[alpha].corr(df[ret])*100:7.2f} "
        print(msg)
    print()

In [8]:
def exponential_smoothing(series, alpha):
    result = [series[1]] # first value is same as series
    for i in range(1, len(series)):
        result.append(alpha * series[i] + (1 - alpha) * result[i-1])

    return result
# Assuming you have a dataframe called "df" with a column named "close"
# alpha = 0.5
# df['smoothed_close'] = exponential_smoothing(df['close'], alpha)
def time_smooth(dataframe, col_name, halflife=15):
    """
    This function will smooth the data
    """
    hl = str(halflife)
    string = str(col_name)
    dataframe['datetime'] = pd.to_datetime(dataframe['datetime'], format='%Y-%m-%d %H:%M:%S')

    dataframe[string] = dataframe['close'].ewm(halflife = hl,times=dataframe['datetime'], adjust = True).mean()
    return dataframe


In [9]:
df['close_1'] = exponential_smoothing(df['close'], 0.99)
df['close_999'] = exponential_smoothing(df['close'], 0.999)
df['close_998'] = exponential_smoothing(df['close'], 0.998)
df['close_995'] = exponential_smoothing(df['close'], 0.995)




df['close_5'] = exponential_smoothing(df['close'], 0.95)
df['close_10'] = exponential_smoothing(df['close'], 0.9)
df['close_15'] = exponential_smoothing(df['close'], 0.85)
df['close_30'] = exponential_smoothing(df['close'], 0.7)
df['close_60'] = exponential_smoothing(df['close'], 0.5)

In [10]:
df['change_5'] = df['close'].rolling(window=5).mean().pct_change() * 1e4

df['ret_5'] = df['close'].rolling(window=5).mean().pct_change() * 1e4
df['ret_1'] = df['close'].rolling(window=1).mean().pct_change() * 1e4
df['ret_10'] = df['close'].rolling(window=10).mean().pct_change() * 1e4
df['ret_30'] = df['close'].rolling(window=30).mean().pct_change() * 1e4

df['ret_5'] = df['ret_5'].shift(-5)
df['ret_1'] = df['ret_1'].shift(-1)
df['ret_10'] = df['ret_10'].shift(-10)
df['ret_30'] = df['ret_30'].shift(-30)

df.head(10)
df = df.dropna()
df = df.reset_index(drop=True)

# Section 2: Kalman Filter

In [17]:
class KalmanFilter:

    def __init__(self, df, tbs, initial_state_mean, initial_state_covariance, X, P, A, Q, B, U, R):
        """
        :param df: dataframe with the columns mentioned above
        :param tbs: time between samples
        :param initial_state_mean: initial state mean
        :param initial_state_covariance: initial state covariance
        :param observation_covariance: observation covariance
        :param transition_covariance: transition covariance
        """
        self.df = df
        self.tbs = tbs
        self.initial_state_mean = initial_state_mean
        self.initial_state_covariance = initial_state_covariance
        self.initial_state_mean = initial_state_mean
        self.initial_state_covariance = initial_state_covariance
        self.counter = 0

        self.X = X
        self.P = P
        self.A = A
        self.Q = Q
        self.B = B
        self.U = U
        self.R = R
        self.H = np.eye(self.X.shape[0])
        self.K = np.zeros((self.X.shape[0], self.X.shape[0]))   
        self.Y = self.get_measurements()
        self.LH = [0, 0]

        # self.transition_matrices = transition_matrices
        # self.transition_offsets = transition_offsets
        # self.observation_matrices = observation_matrices
        # self.observation_offsets = observation_offsets
        # self.transition_covariance = transition_covariance
        # self.observation_covariance = observation_covariance
        

    def kalman_init(self):
        """
        This function will initialize the kalman filter
        """
        # self.X = np.array([[0.0], [0.0], [0.0], [0.0], [0.0]])
        # P is an array of 5x5 random values
        self.P = np.zeros((5, 5))

        # Fill the diagonal elements with random values between 0 and 1
        P = np.eye(self.X.shape[0])

        # Fill the off-diagonal elements with smaller random values
        for i in range(5):
            for j in range(5):
                if i != j:
                    self.P[i, j] = np.random.rand() * 0.1  # You can adjust the scale (0.1) as needed

        self.A = np.array([[0.5, 1, -0.5, 0, 0.1], 
                    [0.5, 1, -0.5, 0, 0.05],
                    [0.3, 1.5, -0.8, 0, 0.10],
                    [0, 0, 0, 1, 0],
                    [0, 0, 0, 0, 1]])
        self.Q = np.eye(self.X.shape[0]) * 0.1
        self.B = self.A * 0
        self.U = self.X * 0
        self.R = self.P
        
    def kf_predict(self):

        r"""Calculate the mean and covariance of :math:`P(x_{t+1} | z_{0:t})`

        Using the mean and covariance of :math:`P(x_t | z_{0:t})`, calculate the
        mean and covariance of :math:`P(x_{t+1} | z_{0:t})`.

        Returns
        -------
        predicted_state_mean : [n_dim_state] array
            mean of state at time t+1 given observations from times [0...t]
        predicted_state_covariance : [n_dim_state, n_dim_state] array
            covariance of state at time t+1 given observations from times
            [0...t]
        """

        self.X = dot(self.A, self.X) + dot(self.B, self.U)
        self.P = dot(self.A, dot(self.P, self.A.T)) + self.Q

        return(self.X, self.P) 
    


    def kf_pdf(self, M, S):
        if M.shape[1] == 1:
            # Tile M to match the shape of X
            DX = self.Y - np.tile(M, (1, self.Y.shape[1]))
            # Compute the exponential term
            E = 0.5 * np.sum(DX * (np.dot(inv(S), DX)), axis=0)
            # Compute the probability density
            E = E + 0.5 * M.shape[0] * log(2 * pi) + 0.5 * log(det(S))
            # print("E:", E)
            P = m.exp(-E)
        else:
            # Case when both X and M have more than one column
            DX = self.X - M
            # Compute the exponential term
            E = 0.5 * np.dot(DX.T, np.dot(inv(S), DX))
            # Compute the probability density
            E = E + 0.5 * M.shape[0] * log(2 * pi) + 0.5 * log(det(S))
            P = np.exp(-E)
        
        # Return the first element of P and E if they are arrays
        return (P[0] if isinstance(P, np.ndarray) else P, E[0] if isinstance(E, np.ndarray) else E)


    def get_measurements(self):
        halflives = [1, 995, 998, 999]
        if self.counter in self.df.index:
            self.Y = self.df.loc[self.counter, ['close', f'close_{halflives[0]}', f'close_{halflives[1]}', f'close_{halflives[2]}', f'close_{halflives[3]}']]
            # self.Y = self.df.loc[self.counter, ['close', 'RSI', 'CCI', 'Momentum', 'volatility']]
            self.Y = self.Y.values.reshape(5, 1)            
            # print("self.Y ka shape:", self.Y.shape)
            # print("self.Y:", self.Y)
            # print("self.Y.shape:", self.Y.shape)
            return self.Y
        else:
            # Handle the case where self.counter is not in the DataFrame index
            print(f"Warning: Index {self.counter} not found in DataFrame.")
            self.flag = True
            return None
    
    def kf_update(self):
        IM = dot(self.H, self.X)
        IS = self.R + dot(self.H, dot(self.P, self.H.T))
        self.K = dot(self.P, dot(self.H.T, inv(IS)))
        self.X = self.X + dot(self.K, (self.Y-IM))

        # print("self.X:", self.X)
        # print("self.X.shape:", self.X.shape)
        # print(self.counter)
        self.P = self.P - dot(self.K, dot(IS, self.K.T))
        self.LH = self.kf_pdf(IM, IS)
        return (self.X, self.P, self.K, self.LH)


    def kalman_filter_run(self):
        """
        :param x0: initial state mean
        :param P0: initial state covariance
        :param R: observation covariance
        :param Q: transition covariance
        :return: updated and predicted new values of x and P
        """
        n = len(self.df)
        print("n:", n)
        # To apply the kalman filter on each row of the dataframe

        
        while self.counter < n:
            
            self.Y = self.get_measurements()

            if self.Y is None:
                self.counter += 1
                continue

            self.X, self.P = self.kf_predict()
            self.X, self.P, self.K, self.LH = self.kf_update()

            self.df.loc[self.counter, 'kalman1'] = self.X[0]
            self.df.loc[self.counter, 'kalman2'] = self.X[1]
            self.df.loc[self.counter, 'kalman3'] = self.X[2]
            self.df.loc[self.counter, 'kalman4'] = self.X[3]
            self.df.loc[self.counter, 'kalman5'] = self.X[4]
            self.df.loc[self.counter, 'kalman_conf'] = self.LH[0]
            self.df.loc[self.counter, 'kalman_err'] = self.LH[1]

            self.counter += 1


        return self.df

In [12]:
def kalman_init():
    """
    This function will initialize the kalman filter
    """
    X = np.array([[0.0], [0.0], [0.0], [0.0], [0.0]])
    # P is an array of 5x5 random values
    feature_importances = np.array([4.40332102916679e-05, 3.566010909677725e-05, 3.093611319433873e-05, 0.9998415780908758, 4.779247654146511e-05])
    normalized_importances = feature_importances / np.sum(feature_importances)


    P = np.zeros((5, 5))

    # Fill the diagonal elements with random values between 0 and 1
    P = np.eye(X.shape[0])

    # Fill the off-diagonal elements with smaller random values
    for i in range(5):
        for j in range(5):
            if i != j:
                P[i, j] = np.random.rand() * 0.1  # You can adjust the scale (0.1) as needed

    # A = np.array([[0.5, 1, -0.5, 0, 0.1], 
    #               [0.5, 1, -0.5, 0, 0.05],
    #               [0.3, 1.5, -0.8, 0, 0.10],
    #               [0, 0, 0, 1, 0],
    #               [0, 0, 0, 0, 1]])
    
    A = np.array([[feature_importances[3], feature_importances[0], feature_importances[1], feature_importances[2], feature_importances[4]], 
                  [0.0, 1.0, 0.0, 0, 0.0],
                  [0.0, 0.0, 1.0, 0.0, 0.0],
                  [0, 0, 0, 1, 0],
                  [0, 0, 0, 0, 1]])
    Q = np.eye(X.shape[0]) * 0.1
    B = A * 0
    U = X * 0
    R = P
    return X, P, A, Q, B, U, R


X, P, A, Q, B, U, R = kalman_init()

In [40]:
def kalman_init_2():
    """
    This function will initialize the kalman filter
    """
    X = np.array([[0.0], [0.0], [0.0], [0.0], [0.0]])
    # P is an array of 5x5 random values
    feature_importances = np.array([4.40332102916679e-05, 3.566010909677725e-05, 3.093611319433873e-05, 0.9998415780908758, 4.779247654146511e-05])
    normalized_importances = feature_importances / np.sum(feature_importances)


    P = np.zeros((5, 5))

    # Fill the diagonal elements with random values between 0 and 1
    P = np.eye(X.shape[0])

    # Fill the off-diagonal elements with smaller random values
    for i in range(5):
        for j in range(5):
            if i != j:
                P[i, j] = np.random.rand() * 0.1  # You can adjust the scale (0.1) as needed

    # A = np.array([[0.5, 1, -0.5, 0, 0.1], 
    #               [0.5, 1, -0.5, 0, 0.05],
    #               [0.3, 1.5, -0.8, 0, 0.10],
    #               [0, 0, 0, 1, 0],
    #               [0, 0, 0, 0, 1]])
    
    A = np.array([[0.4, 1.8, -1.2, 0, 0], 
                  [0.0, 1.0, 0.0, 0, 0.0],
                  [0.0, 0.0, 1.0, 0.0, 0.0],
                  [0, 0, 0, 1, 0],
                  [0, 0, 0, 0, 1]])
    
    A = np.array([[1, 0, 0, 0, 0], 
                  [0.0, 1.0, 0.0, 0, 0.0],
                  [0.0, 0.0, 1.0, 0.0, 0.0],
                  [0, 0, 0, 1, 0],
                  [0, 0, 0, 0, 1]])
    Q = np.eye(X.shape[0]) * 0.1
    B = A * 0
    U = X * 0
    R = P
    return X, P, A, Q, B, U, R


# X, P, A, Q, B, U, R = kalman_init_2()

In [41]:
kf = KalmanFilter(df, 30, 0, 0, X, P, A, Q, B, U, R)

In [42]:
df = kf.kalman_filter_run()

n: 7417


In [43]:
df['k_alpha1'] = df['kalman1'] - df['close']
df['k_alpha2'] = df['kalman2'] - df['close']
df['k_alpha3'] = df['kalman3'] - df['close']
df['k_alpha4'] = df['kalman4'] - df['close']
df['k_alpha5'] = df['kalman5'] - df['close']

df['k_alpha1'] = pd.to_numeric(df['k_alpha1'], errors='coerce')
df['k_alpha2'] = pd.to_numeric(df['k_alpha2'], errors='coerce')
df['k_alpha3'] = pd.to_numeric(df['k_alpha3'], errors='coerce')
df['k_alpha4'] = pd.to_numeric(df['k_alpha4'], errors='coerce')
df['k_alpha5'] = pd.to_numeric(df['k_alpha5'], errors='coerce')

In [44]:
rets = ['ret_1', 'ret_5','ret_10']
# aggfunc = ['mean', 'median', 'count']
aggfunc = ['mean']
alphas = ['k_alpha1', 'k_alpha2', 'k_alpha3', 'k_alpha4', 'k_alpha5']

In [45]:
print_buckets(df, alphas, rets, aggfunc)
print_corrs(df, alphas, rets)

                           mean                    
                          ret_1    ret_10     ret_5
k_alpha1                                           
(-4587.015, -411.627] -0.775841  2.240344  1.006899
(-411.627, -205.976]   4.775646  2.656551  3.219985
(-205.976, -92.992]    0.222323  2.577467  1.621953
(-92.992, -39.678]     3.568704  3.386525  2.056854
(-39.678, -13.568]     3.094617  1.649724  1.696762
(-13.568, 9.183]       3.263390  2.077629  2.022224
(9.183, 45.785]        3.760109  2.044079  0.529323
(45.785, 132.174]     -1.143231  1.999906  1.052640
(132.174, 373.194]    -3.325708 -0.469933 -0.315309
(373.194, 3732.257]    8.366570 -0.028180  5.571742
                                    mean                    
                                   ret_1    ret_10     ret_5
k_alpha2                                                    
(-4578.463000000001, -368.072] -1.531105  2.070715 -0.015491
(-368.072, -178.947]            5.058008  2.902924  3.329171
(-178.947, -79.658]

In [38]:
df['flag1'] = np.nan
df['flag1'] = np.where(df['k_alpha1'] > 300, 1, 0) 

n = len(df)
compare = 0
i = 0
while i < n:
    if df.loc[i, 'flag1'] == 1 and compare == 0:
        df.loc[i, 'signal'] = 1
        compare = 1
        i = i + 1
    elif df.loc[i, 'flag1'] == 1 and compare == 1:  
        df.loc[i , 'signal'] = 0
        i = i + 1
    elif df.loc[i, 'flag1'] == 0 and compare == 1:
        df.loc[i, 'signal'] = -1
        compare = 0
        i = i + 1
    else:
        df.loc[i, 'flag1'] = 0
        i+=1

df['signal'] = df['signal'].fillna(0)

In [39]:
df.to_csv(r'C:\Users\ayush\Desktop\IITB\ZeltaLabPS\BackTesting\src\logs\k2_1.csv')