#### Import relevant libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller

#### Load dataset

In [2]:
air_traffic_data = pd.read_csv("data/SF_Air_Traffic_Passenger_Statistics_Transformed.csv")

#### Inspect first 5 rows and data types of the dataset

In [3]:
air_traffic_data.head()

Unnamed: 0,Date,Total Passenger Count
0,200601,2448889
1,200602,2223024
2,200603,2708778
3,200604,2773293
4,200605,2829000


In [4]:
air_traffic_data.shape

(132, 2)

In [5]:
air_traffic_data.dtypes

Date                     int64
Total Passenger Count    int64
dtype: object

#### Transform date int to date

In [6]:
air_traffic_data['Date']= pd.to_datetime(air_traffic_data['Date'], format = "%Y%m")

In [7]:
air_traffic_data.dtypes

Date                     datetime64[ns]
Total Passenger Count             int64
dtype: object

#### Set date as index

In [8]:
air_traffic_data.set_index('Date',inplace = True)
air_traffic_data.shape

(132, 1)

#### Check Stationarity

In [9]:
adf_result = adfuller(air_traffic_data)
adf_result

(0.7015289287377346,
 0.9898683326442054,
 13,
 118,
 {'1%': -3.4870216863700767,
  '5%': -2.8863625166643136,
  '10%': -2.580009026141913},
 3039.0876643475)

In [10]:
print('ADF Test Statistic: %f' % adf_result[0])

print('p-value: %f' % adf_result[1])

print('Critical Values:')

print(adf_result[4])

if adf_result[0] < adf_result[4]["5%"]:
    print ("Reject Null Hypothesis - Time Series is Stationary")
else:
    print ("Failed to Reject Null Hypothesis - Time Series is Non-Stationary")

ADF Test Statistic: 0.701529
p-value: 0.989868
Critical Values:
{'1%': -3.4870216863700767, '5%': -2.8863625166643136, '10%': -2.580009026141913}
Failed to Reject Null Hypothesis - Time Series is Non-Stationary
