In [None]:
#THIS SCRIPT IS A COINTEGRATED AUGMENTED DICKEY FULLER TEST
"""
The Cointegrated Augmented Dickey-Fuller (CADF) determines the optimal hedge ratio by performing a linear regression 
against the two time series and then tests for stationarity under the linear combination.

"""

In [None]:
#Import Necessary Modules
from __future__ import print_function

import statsmodels.tsa.stattools as ts 

from datetime import datetime, timedelta

import MetaTrader5 as mt5
from config import *
import pandas as pd
import plotly.express as px
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import statsmodels.formula.api as smf

In [None]:
#Check connection to MT5
result = mt5.initialize()
if login_number == mt5.account_info().login and result is True:
    print("Connection to MetaTrader5 established")
else:
    print("Failed to connect at account #{}, error code: {}".format(login_number, mt5.last_error()))
    mt5.shutdown()

In [None]:
#GRAPH PLOTTING FUNCTIONS

def plot_price_series(df, ts1, ts2):
    months = mdates.MonthLocator()
    fig, ax = plt.subplots()
    ax.plot(df.index, df[ts1], label =ts1)
    ax.plot(df.index, df[ts2], label=ts2)
    ax.xaxis.set_major_locator(months)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax.grid(True)
    fig.autofmt_xdate()
    plt.xlabel('Month/Year')
    plt.ylabel('Price ($)')
    plt.title('%s and %s Daily Prices' % (ts1, ts2))
    plt.legend()
    plt.show()
    

def plot_scatter_series(df, ts1, ts2):
    plt.xlabel('%s Price ($)' % ts1)
    plt.ylabel('%s Price ($)' % ts2)
    plt.title('%s and %s Price Scatterplot' % (ts1, ts2))
    plt.scatter(df[ts1], df[ts2])
    plt.show()
    

def plot_residuals(df: pd.DataFrame):
    """
    Plots the residuals from a DataFrame.

    Args:
        df (pd.DataFrame): DataFrame with a datetime index and a 'res' column.
    """
    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot the residuals
    ax.plot(df.index, df["res"], label="Residuals")

    # Add a horizontal line at y=0 for easy reference
    ax.axhline(0, color='red', linestyle='--', linewidth=1)

    # --- Formatting the plot ---
    # Format x-axis for dates
    months = mdates.MonthLocator()
    ax.xaxis.set_major_locator(months)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    fig.autofmt_xdate() # Auto-rotate date labels

    # Set labels, title, and grid using the 'ax' object for consistency
    ax.set_title('Time Series of Residuals')
    ax.set_xlabel('Date')
    ax.set_ylabel('Residual Value') # Corrected label
    ax.legend()
    ax.grid(True)

    plt.show()
    

In [None]:
#Get Nvidia and AMD OHLC Data from today back 365days
#OHLC DATA
nvda_rates = mt5.copy_rates_range("NVDA.NAS", mt5.TIMEFRAME_D1, datetime.now() - timedelta(days=365), datetime.now())
amd_rates = mt5.copy_rates_range("AMD.NAS", mt5.TIMEFRAME_D1, datetime.now() - timedelta(days=365), datetime.now())

# Convert to pandas DataFrame
nvda_ohlc_data = pd.DataFrame(nvda_rates)
amd_ohlc_data = pd.DataFrame(amd_rates)

# Convert time to datetime and SET IT AS THE INDEX
nvda_ohlc_data['date'] = pd.to_datetime(nvda_ohlc_data['time'], unit='s')  #Convery the date format into mdates format
nvda_ohlc_data.set_index('date', inplace=True) 

amd_ohlc_data['date'] = pd.to_datetime(amd_ohlc_data['time'], unit='s')
amd_ohlc_data.set_index('date', inplace=True)

df = pd.DataFrame({
    'NVDA': nvda_ohlc_data['close'],
    'AMD': amd_ohlc_data['close']
})

df.dropna(inplace=True)

# Plot the two time series
plot_price_series(df, "NVDA", "AMD")

# Create a Scatter Plot of the two time series
plot_scatter_series(df, "NVDA", "AMD")

# Calculate optimal hedge ratio "Beta"
results = smf.ols(formula='NVDA ~ AMD', data=df).fit()

beta_hr = results.params['AMD']
print(f"Hedge Ratio (beta): {beta_hr:.4f}")

# Calculate the residuals of the linear Combination
df['res'] = results.resid

# Plot the residuals
plot_residuals(df)

#Calculate and output the CADF Test on the residuals 
cadf = ts.adfuller(df["res"])
print(cadf, '\n')

if cadf[1] < 0.05:
    print("Conclusion: The series are likely cointegrated (p-value < 0.05).")
else:
    print("Conclusion: The series are likely not cointegrated (p-value >= 0.05).")

 