<a href="https://colab.research.google.com/github/RyuichiSaito1/inflation-reddit-usa/blob/main/src/granger_causality_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install statsmodels

Original Data Value

In [None]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests
import matplotlib.pyplot as plt

# Read the data from the given paths
cpi_data = pd.read_csv('/content/drive/MyDrive/world-inflation/data/statistics/cpi-u-2012-2022.tsv', sep='\t')
inflation_score_data = pd.read_csv('/content/drive/MyDrive/world-inflation/data/statistics/inflation-score-2012-2022.tsv', sep='\t')

# Correct the date format for CPI data
# Extract the month (remove 'M' and use the rest as the month number)
cpi_data['Month'] = cpi_data['Period'].str[1:].astype(int)  # Extract the month part after 'M'
cpi_data['Date'] = pd.to_datetime(cpi_data['Year'].astype(str) + '-' + cpi_data['Month'].astype(str), format='%Y-%m')

# Set 'Date' as index
cpi_data.set_index('Date', inplace=True)

# Filter CPI data from March 2012 onwards
cpi_data = cpi_data[cpi_data.index >= '2012-03-01']

# Convert 'month' to DateTime index for inflation score data
inflation_score_data['Date'] = pd.to_datetime(inflation_score_data['month'], format='%Y-%m')
inflation_score_data.set_index('Date', inplace=True)

# Filter inflation score data from March 2012 onwards
inflation_score_data = inflation_score_data[inflation_score_data.index >= '2012-03-01']

# Merge the CPI and inflation score data on the Date index
merged_data = pd.merge(cpi_data[['Value']], inflation_score_data[['moiving_average']], left_index=True, right_index=True)

merged_data

In [None]:
# Visualization: Overlay CPI data and inflation score on the same graph
fig, ax1 = plt.subplots(figsize=(12, 6))

# Plot CPI data (blue)
ax1.plot(merged_data.index, merged_data['Value'], label='CPI', color='blue', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('CPI Value', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis to plot Inflation Score
ax2 = ax1.twinx()
# Plot Inflation Score (moiving_average) in red
ax2.plot(merged_data.index, merged_data['moiving_average'], label='Inflation Score', color='red', linewidth=2)
ax2.set_ylabel('Inflation Score', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Add title and grid
ax1.set_title('CPI and Inflation Score Trend (2012-2022)', fontweight='bold')
ax1.grid(True)

# Add legends
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
from scipy.stats import pearsonr
import numpy as np
import scipy.stats as stats

# Calculate Pearson correlation coefficient, p-value, and degrees of freedom
corr, p_value = pearsonr(merged_data['Value'], merged_data['moiving_average'])

# Degrees of freedom
df = len(merged_data) - 2  # n - 2

# Calculate confidence interval for the correlation coefficient
# t-statistic for the given correlation
t_stat = corr * np.sqrt(df / (1 - corr**2))

# Two-tailed confidence level (95% confidence)
alpha = 0.05
critical_t = stats.t.ppf(1 - alpha / 2, df)

# Confidence interval for the correlation coefficient
CI_lower = corr - critical_t * np.sqrt((1 - corr**2) / df)
CI_upper = corr + critical_t * np.sqrt((1 - corr**2) / df)

# Output the correlation, p-value, degrees of freedom, and confidence interval
print(f"Pearson correlation coefficient: {corr:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of freedom: {df}")
print(f"95% Confidence interval for the correlation: ({CI_lower:.4f}, {CI_upper:.4f})")

In [None]:
# Granger causality test function
def granger_causality_analysis(data, maxlag=12):
    results = []
    for lag in range(1, maxlag + 1):
        # Inflation score → CPI
        inf_to_cpi = grangercausalitytests(
            data[['moiving_average', 'Value']],
            maxlag=lag
        )

        # CPI → Inflation score
        cpi_to_inf = grangercausalitytests(
            data[['Value', 'moiving_average']],
            maxlag=lag
        )

        results.append({
            'Lag': lag,
            'InflationScore_to_CPI_pvalue': inf_to_cpi[lag][0]['ssr_ftest'][1],
            'CPI_to_InflationScore_pvalue': cpi_to_inf[lag][0]['ssr_ftest'][1]
        })

    return pd.DataFrame(results)

# Calculate the results of the Granger causality test
granger_results = granger_causality_analysis(merged_data)

# Visualization
fig, ax = plt.subplots(figsize=(10, 6))

# Plot Granger causality test results
ax.plot(granger_results['Lag'], granger_results['InflationScore_to_CPI_pvalue'],
         marker='o', label='Inflation Score → CPI', color='green')
ax.plot(granger_results['Lag'], granger_results['CPI_to_InflationScore_pvalue'],
         marker='s', label='CPI → Inflation Score', color='purple')
ax.axhline(y=0.05, color='red', linestyle='--', label='Significance Level (0.05)')
ax.set_title('Granger Causality Test: P-values', fontweight='bold')
ax.set_xlabel('Lag (Months)')
ax.set_ylabel('P-value')
ax.set_yscale('log')
ax.legend()

plt.tight_layout()
plt.savefig('granger_causality_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

# Output the results in table format
print("\nGranger Causality Test Results:")
print(granger_results.to_string(index=False))

12-Month Percent Change

In [None]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests
import matplotlib.pyplot as plt

# Read the data from the given paths
cpi_data = pd.read_csv('/content/drive/MyDrive/world-inflation/data/statistics/cpi-12-2012-2022.tsv', sep='\t')
inflation_score_data = pd.read_csv('/content/drive/MyDrive/world-inflation/data/statistics/inflation-score-2012-2022.tsv', sep='\t')

# Correct the date format for CPI data
# Extract the month (remove 'M' and use the rest as the month number)
cpi_data['Month'] = cpi_data['Period'].str[1:].astype(int)  # Extract the month part after 'M'
cpi_data['Date'] = pd.to_datetime(cpi_data['Year'].astype(str) + '-' + cpi_data['Month'].astype(str), format='%Y-%m')

# Set 'Date' as index
cpi_data.set_index('Date', inplace=True)

# Filter CPI data from March 2012 onwards
cpi_data = cpi_data[cpi_data.index >= '2012-03-01']

# Convert 'month' to DateTime index for inflation score data
inflation_score_data['Date'] = pd.to_datetime(inflation_score_data['month'], format='%Y-%m')
inflation_score_data.set_index('Date', inplace=True)

# Filter inflation score data from March 2012 onwards
inflation_score_data = inflation_score_data[inflation_score_data.index >= '2012-03-01']

# Merge the CPI and inflation score data on the Date index
merged_data = pd.merge(cpi_data[['Value']], inflation_score_data[['moiving_average']], left_index=True, right_index=True)

merged_data

In [None]:
# Visualization: Overlay CPI data and inflation score on the same graph
fig, ax1 = plt.subplots(figsize=(12, 6))

# Plot CPI data (blue)
ax1.plot(merged_data.index, merged_data['Value'], label='CPI', color='blue', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('CPI Value', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a second y-axis to plot Inflation Score
ax2 = ax1.twinx()
# Plot Inflation Score (moiving_average) in red
ax2.plot(merged_data.index, merged_data['moiving_average'], label='Inflation Score', color='red', linewidth=2)
ax2.set_ylabel('Inflation Score', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# Add title and grid
ax1.set_title('CPI and Inflation Score Trend (2012-2022)', fontweight='bold')
ax1.grid(True)

# Add legends
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
from scipy.stats import pearsonr
import numpy as np
import scipy.stats as stats

# Calculate Pearson correlation coefficient, p-value, and degrees of freedom
corr, p_value = pearsonr(merged_data['Value'], merged_data['moiving_average'])

# Degrees of freedom
df = len(merged_data) - 2  # n - 2

# Calculate confidence interval for the correlation coefficient
# t-statistic for the given correlation
t_stat = corr * np.sqrt(df / (1 - corr**2))

# Two-tailed confidence level (95% confidence)
alpha = 0.05
critical_t = stats.t.ppf(1 - alpha / 2, df)

# Confidence interval for the correlation coefficient
CI_lower = corr - critical_t * np.sqrt((1 - corr**2) / df)
CI_upper = corr + critical_t * np.sqrt((1 - corr**2) / df)

# Output the correlation, p-value, degrees of freedom, and confidence interval
print(f"Pearson correlation coefficient: {corr:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of freedom: {df}")
print(f"95% Confidence interval for the correlation: ({CI_lower:.4f}, {CI_upper:.4f})")

In [None]:
# Granger causality test function
def granger_causality_analysis(data, maxlag=12):
    results = []
    for lag in range(1, maxlag + 1):
        # Inflation score → CPI
        inf_to_cpi = grangercausalitytests(
            data[['moiving_average', 'Value']],
            maxlag=lag
        )

        # CPI → Inflation score
        cpi_to_inf = grangercausalitytests(
            data[['Value', 'moiving_average']],
            maxlag=lag
        )

        results.append({
            'Lag': lag,
            'InflationScore_to_CPI_pvalue': inf_to_cpi[lag][0]['ssr_ftest'][1],
            'CPI_to_InflationScore_pvalue': cpi_to_inf[lag][0]['ssr_ftest'][1]
        })

    return pd.DataFrame(results)

# Calculate the results of the Granger causality test
granger_results = granger_causality_analysis(merged_data)

# Visualization
fig, ax = plt.subplots(figsize=(10, 6))

# Plot Granger causality test results
ax.plot(granger_results['Lag'], granger_results['InflationScore_to_CPI_pvalue'],
         marker='o', label='Inflation Score → CPI', color='green')
ax.plot(granger_results['Lag'], granger_results['CPI_to_InflationScore_pvalue'],
         marker='s', label='CPI → Inflation Score', color='purple')
ax.axhline(y=0.05, color='red', linestyle='--', label='Significance Level (0.05)')
ax.set_title('Granger Causality Test: P-values', fontweight='bold')
ax.set_xlabel('Lag (Months)')
ax.set_ylabel('P-value')
ax.set_yscale('log')
ax.legend()

plt.tight_layout()
plt.savefig('granger_causality_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

# Output the results in table format
print("\nGranger Causality Test Results:")
print(granger_results.to_string(index=False))