In [2]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller, grangercausalitytests
from itertools import combinations

Load and preprocess the dataset

In [None]:
path = 'C:/Users/natha/OneDrive/Bureau/Interview trainings/Coding/Aquatic/Weather_Forecast_Ideas/data/chicago_beach_weather.csv'
df = pd.read_csv(path)

df['Measurement Timestamp'] = pd.to_datetime(df['Measurement Timestamp'], format='%m/%d/%Y %I:%M:%S %p')
stations = df['Station Name'].unique()
station_data = {station: df[df['Station Name'] == station].copy() for station in stations}

for station, data in station_data.items():
    data.set_index('Measurement Timestamp', inplace=True)
    data = data.asfreq('H')
    data['Air Temperature'].fillna(method='ffill', inplace=True)
    station_data[station] = data['Air Temperature']

Perform ADF test for stationarity

In [4]:
for station, series in station_data.items():
    adf_result = adfuller(series.dropna())
    print(f'Station: {station}')
    print(f'ADF Statistic: {adf_result[0]}')
    print(f'p-value: {adf_result[1]}')
    for key, value in adf_result[4].items():
        print(f'Critical Value ({key}): {value}')
    
    if adf_result[1] < 0.01:
        print(f"The time series for {station} is likely stationary (p-value < 0.01).\n")
    else:
        print(f"The time series for {station} is likely non-stationary (p-value >= 0.01).\n")

Station: 63rd Street Weather Station
ADF Statistic: -3.555596741943997
p-value: 0.006668917739446431
Critical Value (1%): -3.43109790993778
Critical Value (5%): -2.861870526484161
Critical Value (10%): -2.566945934275714
The time series for 63rd Street Weather Station is likely stationary (p-value < 0.01).

Station: Oak Street Weather Station
ADF Statistic: -3.523785179153752
p-value: 0.007396843807650962
Critical Value (1%): -3.43109790993778
Critical Value (5%): -2.861870526484161
Critical Value (10%): -2.566945934275714
The time series for Oak Street Weather Station is likely stationary (p-value < 0.01).

Station: Foster Weather Station
ADF Statistic: -3.518918752716674
p-value: 0.00751430787694948
Critical Value (1%): -3.43109790993778
Critical Value (5%): -2.861870526484161
Critical Value (10%): -2.566945934275714
The time series for Foster Weather Station is likely stationary (p-value < 0.01).



Calculate correlation between each pair of stations

In [6]:
for station1, station2 in combinations(station_data.keys(), 2):
    series1 = station_data[station1]
    series2 = station_data[station2]
    
    correlation = series1.corr(series2)
    print(f"Correlation between {station1} and {station2}: {correlation:.4f}")

Correlation between 63rd Street Weather Station and Oak Street Weather Station: 0.9936
Correlation between 63rd Street Weather Station and Foster Weather Station: 0.9943
Correlation between Oak Street Weather Station and Foster Weather Station: 0.9963


Perform Granger causality tests

In [7]:
max_lag = 5

for station1, station2 in combinations(station_data.keys(), 2):
    data = pd.concat([station_data[station1], station_data[station2]], axis=1)
    data.columns = [station1, station2]
    data.dropna(inplace=True)

    print(f"Testing Granger causality between {station1} and {station2}")
    try:
        granger_test = grangercausalitytests(data, max_lag, verbose=False)
        
        for lag in range(1, max_lag + 1):
            f_test_pvalue = granger_test[lag][0]['ssr_ftest'][1]
            print(f"Lag {lag}: p-value = {f_test_pvalue}")
            if f_test_pvalue < 0.01:
                print(f"Granger causality found at lag {lag} (p-value < 0.01) for {station1} causing {station2}\n")
            else:
                print(f"No Granger causality at lag {lag} for {station1} causing {station2}\n")
    except Exception as e:
        print(f"Granger causality test failed between {station1} and {station2} due to: {e}\n")

Testing Granger causality between 63rd Street Weather Station and Oak Street Weather Station
Lag 1: p-value = 4.367481381522929e-83
Granger causality found at lag 1 (p-value < 0.01) for 63rd Street Weather Station causing Oak Street Weather Station

Lag 2: p-value = 1.2882992559624966e-215
Granger causality found at lag 2 (p-value < 0.01) for 63rd Street Weather Station causing Oak Street Weather Station

Lag 3: p-value = 2.0667821887004174e-211
Granger causality found at lag 3 (p-value < 0.01) for 63rd Street Weather Station causing Oak Street Weather Station

Lag 4: p-value = 8.0982410741597e-211
Granger causality found at lag 4 (p-value < 0.01) for 63rd Street Weather Station causing Oak Street Weather Station

Lag 5: p-value = 2.3526604698100863e-209
Granger causality found at lag 5 (p-value < 0.01) for 63rd Street Weather Station causing Oak Street Weather Station

Testing Granger causality between 63rd Street Weather Station and Foster Weather Station
Lag 1: p-value = 2.610773442

