In [3]:
import pandas as pd
from pandas import read_csv
from statsmodels.tsa.stattools import adfuller

In [23]:
df = pd.read_excel("exchange-rate-uk-us-data.xlsx", sheet_name = "1")

In [24]:
df.head(10)

Unnamed: 0,observation_date,GBPUSD
0,2016-09-28,1.2996
1,2016-09-29,1.2991
2,2016-09-30,1.3015
3,2016-10-03,1.284
4,2016-10-04,1.2746
5,2016-10-05,1.2745
6,2016-10-06,1.2647
7,2016-10-07,1.2434
8,2016-10-11,1.2218
9,2016-10-12,1.2195


In [25]:
series = df['GBPUSD']

### The Augmented Dickey-Fuller test is a type of statistical test called a unit root test.

The intuition behind a unit root test is that it determines how strongly a time series is defined by a trend.

There are a number of unit root tests and the Augmented Dickey-Fuller may be one of the more widely used. It uses an autoregressive model and optimizes an information criterion across multiple different lag values.

The null hypothesis of the test is that the time series can be represented by a unit root, that it is not stationary (has some time-dependent structure). The alternate hypothesis (rejecting the null hypothesis) is that the time series is stationary.

## Null Hypothesis (H0): If failed to be rejected, it suggests the time series has a unit root, meaning it is non-stationary. It has some time dependent structure.
Alternate Hypothesis (H1): The null hypothesis is rejected; it suggests the time series does not have a unit root, meaning it is stationary. It does not have time-dependent structure.
We interpret this result using the p-value from the test. A p-value below a threshold (such as 5% or 1%) suggests we reject the null hypothesis (stationary), otherwise a p-value above the threshold suggests we fail to reject the null hypothesis (non-stationary).

### p-value > 0.05: Fail to reject the null hypothesis (H0), the data has a unit root and is non-stationary.
### p-value <= 0.05: Reject the null hypothesis (H0), the data does not have a unit root and is stationary.
Below is an example of calculating the Augmented Dickey-Fuller test on the Daily Female Births dataset. The statsmodels library provides the adfuller() function that implements the test.

https://machinelearningmastery.com/time-series-data-stationary-python/

In [26]:
X = series.values
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -2.435778
p-value: 0.131892
Critical Values:
	1%: -3.437
	5%: -2.865
	10%: -2.568


FOREX not stationary

In [37]:
df = pd.read_excel("Covid19_uk.xlsx")

In [32]:
df.describe()

Unnamed: 0,day,month,year,cases,deaths,popData2018
count,166.0,166.0,166.0,166.0,166.0,166.0
mean,15.018072,3.259036,2020.0,1773.343373,250.975904,66488991.0
std,8.819038,1.59898,0.0,2064.965853,328.086277,0.0
min,1.0,1.0,2020.0,-525.0,0.0,66488991.0
25%,7.25,2.0,2020.0,0.0,0.0,66488991.0
50%,14.0,3.0,2020.0,836.5,55.5,66488991.0
75%,22.75,5.0,2020.0,3615.5,405.0,66488991.0
max,31.0,6.0,2020.0,8719.0,1172.0,66488991.0


In [34]:
df = df[df.dateRep >= '2020-03-01']

In [38]:
series = df['cases']

In [39]:
X = series.values
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: -1.814167
p-value: 0.373451
Critical Values:
	1%: -3.474
	5%: -2.881
	10%: -2.577


In [40]:
df = pd.read_excel("Air-passengersdata2.xlsx")

In [41]:
df.head()

Unnamed: 0,Year,#Passengers
0,1949-01-01,112
1,1949-02-01,118
2,1949-03-01,132
3,1949-04-01,129
4,1949-05-01,121


In [42]:
series = df['#Passengers']

In [43]:
X = series.values
result = adfuller(X)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

ADF Statistic: 0.815369
p-value: 0.991880
Critical Values:
	1%: -3.482
	5%: -2.884
	10%: -2.579
