In [1]:
# check in two batches, first if the US, GB, JPY, and GER 10yr treasury yields are all I(1) integrated 
# US is already checked in the ADF_test.py

import numpy as np
import pandas as pd

import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller

import matplotlib.pyplot as plt


In [2]:
gb = pd.read_csv("Data/csv/GB_10yr_2000.csv")
jpy = pd.read_csv("Data/csv/JPY_10yr_2000.csv")
ger = pd.read_csv("Data/csv/GER_10yr_2000.csv")
usd = pd.read_csv("Data/csv/US10_yr_2000.csv")
print(gb)

            Date  PX_LAST  YLD_CNV_LAST
0      11/2/2023   4.3796         4.380
1      11/1/2023   4.4958         4.496
2     10/31/2023   4.5094         4.509
3     10/30/2023   4.5576         4.558
4     10/27/2023   4.5422         4.542
...          ...      ...           ...
5981  11/13/2000   5.0964         5.096
5982  11/10/2000   5.1322         5.132
5983   11/9/2000   5.1550         5.155
5984   11/8/2000   5.1713         5.171
5985   11/7/2000   5.1676         5.168

[5986 rows x 3 columns]


In [3]:
# check for stationarity for all 3
def check_for_stationarity(X, cutoff=0.01):
    # H_0 in adfuller is unit root exists (non-stationary)
    # We must observe significant p-value to convince ourselves that the series is stationary
    # adfuller (augmented dicky fuller test) is a statistically significant test for stationary dataset
    pvalue = adfuller(X)[1]
    if pvalue < cutoff:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely stationary.')
        return True
    else:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely non-stationary.')
        return False


In [4]:
gb_series = gb['PX_LAST']
jpy_series = jpy['PX_LAST']
ger_series = ger['PX_LAST']
usd_series = usd['PX_LAST']

In [5]:
check_for_stationarity(gb_series)
check_for_stationarity(jpy_series)
check_for_stationarity(ger_series)
check_for_stationarity(usd_series)

p-value = 0.7188404905250849 The series PX_LAST is likely non-stationary.
p-value = 0.8301394157255471 The series PX_LAST is likely non-stationary.
p-value = 0.9349103498995713 The series PX_LAST is likely non-stationary.
p-value = 0.5840302941898874 The series PX_LAST is likely non-stationary.


False

In [6]:
# take the diff, I(1)
gb1 = gb_series.diff()[1:]
gb1.name = 'GB 10yr'
jpy1 = jpy_series.diff()[1:]
jpy1.name = 'JPY 10yr'
ger1 = ger_series.diff()[1:]
ger1.name = 'GER 10yr'
usd1 = usd_series.diff()[1:]
usd1.name = 'US 10yr'

In [7]:
check_for_stationarity(gb1)
check_for_stationarity(jpy1)
check_for_stationarity(ger1)
check_for_stationarity(usd1)

p-value = 0.0 The series GB 10yr is likely stationary.
p-value = 7.294296317804492e-30 The series JPY 10yr is likely stationary.
p-value = 0.0 The series GER 10yr is likely stationary.
p-value = 7.865516065070602e-30 The series US 10yr is likely stationary.


True

In [8]:
print(gb_series[:5911])

0       4.3796
1       4.4958
2       4.5094
3       4.5576
4       4.5422
         ...  
5906    4.9044
5907    4.9768
5908    4.9779
5909    4.9854
5910    4.9611
Name: PX_LAST, Length: 5911, dtype: float64


In [9]:
print(jpy1.shape)

(5911,)


In [10]:
coint(gb_series[:5911], jpy_series[:5911])
# cointegrated by .05 

(-3.40602114581449,
 0.04168609362817977,
 array([-3.89829407, -3.33716405, -3.04516771]))

In [11]:
coint(ger_series[:5911], jpy_series[:5911])

(-3.046975084959263,
 0.09945719386187479,
 array([-3.89829407, -3.33716405, -3.04516771]))

In [12]:
coint(ger_series[:5911], gb_series[:5911])
# cointegrated by .05 

(-3.6857647529792343,
 0.019111892333026227,
 array([-3.89829407, -3.33716405, -3.04516771]))

In [13]:
coint(usd_series[:5911], jpy_series[:5911])

(-3.2052900788536816,
 0.06904757319983224,
 array([-3.89829407, -3.33716405, -3.04516771]))

In [14]:
coint(usd_series[:5911], gb_series[:5911])

(-3.246063888519483,
 0.0625562395840645,
 array([-3.89829407, -3.33716405, -3.04516771]))

In [15]:
coint(usd_series[:5911], ger_series[:5911])
# cointegrated by .05 

(-3.339649223647702,
 0.04950968583600541,
 array([-3.89829407, -3.33716405, -3.04516771]))

In [16]:
# Of the 6 pairs: (GER:JPY, GER: GB, GB:JPY, US:GER, US:JPY, US:GB), only US:GER, GER:GB, and GB:JPY are co-integrated under threshold of .05.
# GER:JPY has p-value of .09, highest among the 6 pairs.
# The other two pairs US:JPY, US:GB are around .6, so close to threshold. 
# Interesting to note that while GER:GB and GB:JPY are individually co-integrated, GER:JPY is not cointegrated and while US:GER and GER:GB are 
# individually cointegrated, US:GB is not individually co-integrated although within plus/minus .02.
# repesenting < 5% chance that the null hypothesis (no cointegration) could generate such as distribution

### cleaning up:

- turn everything into functions/classes
- display graphs showing the movement of asset prices / returns

### next steps:

- find beta (regression coefficient) 
- find cointegated sets

arima : predict future returns using previous moving averages, returns, etc...
idea of arima is to predict future returns using past returns -> why does the series need to be stationary?

beta * A = B -> IS THE STATIONARITY
beta * A - B = 0 --> beta * A - B is stationary
if at t0, beta * A > B, either B is underpriced or beta * A is overpriced

