In [1]:
# Standard Library Imports
import os
import datetime
import random
import warnings

# Data Manipulation and Numerical Operations
import pandas as pd
import numpy as np
import scipy as sp
from scipy import stats
from scipy.stats import genpareto, skew, kurtosis, t
from scipy.signal import periodogram

# Plotting Libraries
import matplotlib.pyplot as plt
import seaborn as sns
import plotnine as p9
from plotnine import (
    ggplot, aes, geom_line, geom_histogram, geom_boxplot,
    labs, theme_minimal, theme, element_text
)

# Time Series and Statistical Analysis
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf, pacf, adfuller
from statsmodels.graphics.gofplots import qqplot
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.tsa.stattools as ts

# Machine Learning
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity

# Financial Time Series Modeling
from arch import arch_model

# Wavelet Transform and Segmentation
import pywt
import ruptures as rpt

# Hidden Markov Models
from hmmlearn.hmm import GaussianHMM

# API for Financial Data
import quandl

# Ignore Warnings
warnings.filterwarnings('ignore')

In [2]:
def grab_quandl_table(
    table_path,
    avoid_download=False,
    replace_existing=False,
    date_override=None,
    allow_old_file=False,
    **kwargs,
):
    root_data_dir = os.path.join(os.environ["HOME"], "quandl_data_table_downloads")
    data_symlink = os.path.join(root_data_dir, f"{table_path}_latest.zip")
    if avoid_download and os.path.exists(data_symlink):
        print(f"Skipping any possible download of {table_path}")
        return data_symlink
    
    table_dir = os.path.dirname(data_symlink)
    if not os.path.isdir(table_dir):
        print(f'Creating new data dir {table_dir}')
        os.mkdir(table_dir)

    if date_override is None:
        my_date = datetime.datetime.now().strftime("%Y%m%d")
    else:
        my_date = date_override
    data_file = os.path.join(root_data_dir, f"{table_path}_{my_date}.zip")

    if os.path.exists(data_file):
        file_size = os.stat(data_file).st_size
        if replace_existing or not file_size > 0:
            print(f"Removing old file {data_file} size {file_size}")
        else:
            print(
                f"Data file {data_file} size {file_size} exists already, no need to download"
            )
            return data_file

    dl = quandl.export_table(
        table_path, filename=data_file, api_key="SyATKMGuooiZSzbH97Bj", **kwargs
    )
    file_size = os.stat(data_file).st_size
    if os.path.exists(data_file) and file_size > 0:
        print(f"Download finished: {file_size} bytes")
        if not date_override:
            if os.path.exists(data_symlink):
                print(f"Removing old symlink")
                os.unlink(data_symlink)
            print(f"Creating symlink: {data_file} -> {data_symlink}")
            os.symlink(
                data_file, data_symlink,
            )
    else:
        print(f"Data file {data_file} failed download")
        return
    return data_symlink if (date_override is None or allow_old_file) else "NoFileAvailable"


def fetch_quandl_table(table_path, avoid_download=True, **kwargs):
    return pd.read_csv(
        grab_quandl_table(table_path, avoid_download=avoid_download, **kwargs)
    )

In [3]:
t_ivm = fetch_quandl_table('AR/IVM', avoid_download=False)
t_ivm.head()

Data file /Users/anandtheerthanakhate/quandl_data_table_downloads/AR/IVM_20250115.zip size 323894877 exists already, no need to download


Unnamed: 0,exchange_code,futures_code,option_code,expiration,date,futures,atm,rr25,rr10,fly25,...,beta1,beta2,beta3,beta4,beta5,beta6,min_money,max_money,days_expiration,days_termination
0,CBT,BO,BO,1M,2024-12-16,41.731864,0.30264,0.017717,0.028022,0.008861,...,0.136273,2.253584,-4.503889,-12.900171,33.798866,123.711572,-0.208212,0.237649,31.0,31.0
1,CBT,BO,BO,1W,2024-12-16,41.589492,0.275612,-0.009755,-0.026915,0.008703,...,-0.184783,12.724505,-9.970579,-380.836794,172.546492,3543.441557,-0.105488,0.089992,7.0,7.0
2,CBT,BO,BO,1Y,2024-12-16,42.360909,0.280757,0.027657,0.056929,0.003687,...,0.080256,-0.029207,-0.342699,1.571995,0.678517,-3.644149,-0.521268,0.535857,365.0,365.0
3,CBT,BO,BO,2M,2024-12-16,41.915763,0.329431,0.043624,0.072584,0.009547,...,0.234473,0.732446,-3.166721,3.916127,14.57911,-26.63962,-0.291922,0.40686,62.0,62.0
4,CBT,BO,BO,3M,2024-12-16,42.079836,0.33162,0.046527,0.078083,0.00952,...,0.207442,0.439917,-1.951898,2.573566,6.612833,-11.690214,-0.344729,0.488955,90.0,90.0


In [4]:
print(t_ivm.columns)
t_ivm['date'] = pd.to_datetime(t_ivm['date'])
start_date = pd.to_datetime('2021-12-03')
end_date   = pd.to_datetime('2024-08-31')
t_ivm = t_ivm[(t_ivm['date'] >= start_date) & (t_ivm['date'] <= end_date)]
t_ivm.reset_index(drop=True, inplace=True)
t_ivm.head()

Index(['exchange_code', 'futures_code', 'option_code', 'expiration', 'date',
       'futures', 'atm', 'rr25', 'rr10', 'fly25', 'fly10', 'beta1', 'beta2',
       'beta3', 'beta4', 'beta5', 'beta6', 'min_money', 'max_money',
       'days_expiration', 'days_termination'],
      dtype='object')


Unnamed: 0,exchange_code,futures_code,option_code,expiration,date,futures,atm,rr25,rr10,fly25,...,beta1,beta2,beta3,beta4,beta5,beta6,min_money,max_money,days_expiration,days_termination
0,CBT,BO,BO,V2024,2023-07-27,58.04,0.303074,-0.004934,,-0.005185,...,-0.013514,-0.210955,0.27414,1.697856,-1.023394,-4.179139,-0.549587,0.444217,421.24,445.0
1,ICE,KC,KC,H2026,2023-10-02,158.55,0.288991,0.015911,,2.2e-05,...,0.025836,-0.177754,0.101765,1.43522,-0.445698,-2.374568,-0.684043,0.532352,863.15,899.0
2,NYM,NG,NG,F2029,2023-07-27,4.703,0.334631,,,,...,0.159299,0.133878,-1.147995,-1.006677,1.590494,1.4777,-0.73727,0.754384,1979.0,1980.0
3,NYX,C,C,U2025,2023-10-02,2666.0,0.231555,,,,...,0.12061,-2.579713,1.159924,594.907525,-91.601577,-52350.067091,-0.074339,0.075473,696.8,714.0
4,CBT,FF,FF,H2026,2024-07-08,96.26,0.407694,,,,...,0.142212,-0.810909,-0.064047,9.675144,1.422305,-31.649977,-0.385662,0.454088,632.08,632.0


In [5]:
def select_second_month(df):
    df_filtered = df[df['days_expiration'] > 30]
    df_sorted = df_filtered.sort_values(['date', 'days_expiration'])
    df_selected = df_sorted.groupby('date').first().reset_index()
    return df_selected

### Pair 1: 0. ICE.TFM versus NYM.NG × 13.9239

In [6]:
# W instrument: ICE.TFM
df_W = t_ivm[(t_ivm['exchange_code'] == 'ICE') & (t_ivm['futures_code'] == 'TFM')]
df_W_second = select_second_month(df_W)
df_W_second.head()

Unnamed: 0,date,exchange_code,futures_code,option_code,expiration,futures,atm,rr25,rr10,fly25,...,beta1,beta2,beta3,beta4,beta5,beta6,min_money,max_money,days_expiration,days_termination
0,2021-12-03,ICE,TFM,TFM,1M,89.465172,1.319037,0.376558,0.000734,0.114331,...,0.539308,0.611756,-0.572148,-0.20479,0.769922,-0.256666,-0.638413,0.970686,31.0,31.0
1,2021-12-06,ICE,TFM,TFM,1M,89.906034,1.366206,0.366281,,0.105988,...,0.5354,0.503298,-0.665416,0.003996,0.899471,-0.478176,-0.653545,0.989814,31.0,31.0
2,2021-12-07,ICE,TFM,TFM,1M,95.856517,1.349121,0.349928,,0.102696,...,0.488888,0.437723,-0.331826,0.205791,0.466401,-0.436356,-0.640064,0.926889,31.0,31.0
3,2021-12-08,ICE,TFM,TFM,1M,101.475,1.422079,0.376503,,0.11608,...,0.47511,0.460816,-0.141279,-0.063249,0.034089,-0.055042,-0.68396,0.915476,31.0,31.0
4,2021-12-09,ICE,TFM,TFM,1M,100.438448,1.400012,0.403893,,0.109052,...,0.560443,0.263245,-0.166715,0.3743,-0.069076,-0.230129,-0.637622,0.938439,31.0,31.0


In [7]:
# X instrument: NYM.NG * 13.9239
df_X = t_ivm[(t_ivm['exchange_code'] == 'NYM') & (t_ivm['futures_code'] == 'NG')]
df_X_second = select_second_month(df_X)
df_X_second = df_X_second.copy()  
df_X_second['futures_adj'] = df_X_second['futures'] * 13.9239
df_X_second.head()

Unnamed: 0,date,exchange_code,futures_code,option_code,expiration,futures,atm,rr25,rr10,fly25,...,beta2,beta3,beta4,beta5,beta6,min_money,max_money,days_expiration,days_termination,futures_adj
0,2021-12-03,NYM,NG,NG,1M,4.121828,0.724024,0.214463,0.39908,0.038595,...,0.427835,-2.059052,2.16198,1.607154,-2.317277,-0.335525,0.792773,31.0,31.0,57.391915
1,2021-12-06,NYM,NG,NG,1M,3.648172,0.681706,0.155498,0.291336,0.038884,...,1.122779,-2.42759,-0.697706,7.756482,-6.06643,-0.358451,0.722753,31.0,31.0,50.796788
2,2021-12-07,NYM,NG,NG,1M,3.697448,0.698561,0.109787,0.24115,0.0218,...,0.439699,0.010424,1.568657,-4.923368,3.993275,-0.359257,0.729975,31.0,31.0,51.4829
3,2021-12-08,NYM,NG,NG,1M,3.800862,0.74133,0.110508,0.244537,0.021498,...,0.352477,0.308783,0.93313,-4.626956,4.12158,-0.389523,0.730843,31.0,31.0,52.922823
4,2021-12-09,NYM,NG,NG,1M,3.802621,0.685,0.125825,0.279933,0.020226,...,0.175057,0.784971,3.686666,-15.410017,12.795293,-0.362717,0.664367,31.0,31.0,52.94731


Unnamed: 0_level_0,W,X,s1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-12-03,89.465172,57.391915,-32.073257
2021-12-06,89.906034,50.796788,-39.109247
2021-12-07,95.856517,51.4829,-44.373617
2021-12-08,101.475,52.922823,-48.552177
2021-12-09,100.438448,52.94731,-47.491138
