In [15]:
import numpy as np
import pandas as pd

import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller

import matplotlib.pyplot as plt

In [16]:
file_path = "US10_yr_2022.csv"
#  test for stationarity for past year US 10 yr Treasury bond, 2022-2023

In [17]:
df = pd.read_csv(file_path)

In [18]:
print(df)

           Date  PX_LAST  PX_BID
0     11/3/2023   4.5724  4.5744
1     11/2/2023   4.6590  4.6611
2     11/1/2023   4.7341  4.7362
3    10/31/2023   4.9307  4.9328
4    10/30/2023   4.8942  4.8964
..          ...      ...     ...
255  11/11/2022   3.8125  3.8162
256  11/10/2022   3.8125  3.8162
257   11/9/2022   4.0923  4.0986
258   11/8/2022   4.1234  4.1297
259   11/7/2022   4.2135  4.2177

[260 rows x 3 columns]


In [23]:
T_series = df['PX_LAST']
print(T_series)

0      4.5724
1      4.6590
2      4.7341
3      4.9307
4      4.8942
        ...  
255    3.8125
256    3.8125
257    4.0923
258    4.1234
259    4.2135
Name: PX_LAST, Length: 260, dtype: float64


In [24]:
def check_for_stationarity(X, cutoff=0.01):
    # H_0 in adfuller is unit root exists (non-stationary)
    # We must observe significant p-value to convince ourselves that the series is stationary
    # adfuller (augmented dicky fuller test) is a statistically significant test for stationary dataset
    pvalue = adfuller(X)[1]
    if pvalue < cutoff:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely stationary.')
        return True
    else:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely non-stationary.')
        return False

In [25]:
check_for_stationarity(T_series)

p-value = 0.3485143592625197 The series PX_LAST is likely non-stationary.


False

In [29]:
# check if 1st diff is stationary
T1_series = T_series.diff()[1:]
print(T1_series)

1      0.0866
2      0.0751
3      0.1966
4     -0.0365
5     -0.0596
        ...  
255   -0.0411
256    0.0000
257    0.2798
258    0.0311
259    0.0901
Name: PX_LAST, Length: 259, dtype: float64


In [30]:
check_for_stationarity(T1_series)

p-value = 1.2173411737041147e-23 The series PX_LAST is likely stationary.


True

In [38]:
# now check for stationarity over longer time-frame i.e. 2000-2023 for US 10 year treasurty
df2 = pd.read_csv("US10_yr_2000.csv")
print(df2)

            Date  PX_LAST  PX_BID
0      11/3/2023   4.5724  4.5744
1      11/2/2023   4.6590  4.6611
2      11/1/2023   4.7341  4.7362
3     10/31/2023   4.9307  4.9328
4     10/30/2023   4.8942  4.8964
...          ...      ...     ...
5993  11/10/2000   5.7830  5.7830
5994   11/9/2000   5.8250  5.8250
5995   11/8/2000   5.8550  5.8550
5996   11/7/2000   5.8660  5.8660
5997   11/6/2000   5.8550  5.8550

[5998 rows x 3 columns]


In [39]:
T_series2 = df2['PX_LAST']
check_for_stationarity(T_series2)

p-value = 0.5840302941898874 The series PX_LAST is likely non-stationary.


False

In [40]:
T1_series2 = T_series2.diff()[1:]
check_for_stationarity(T1_series2)

p-value = 7.865516065070602e-30 The series PX_LAST is likely stationary.


True

# Conclusion
US Treasury bond **yield** data is I(1) integrated, and likely stationary