In [None]:
import pandas as pd
import numpy as np
import os
import warnings
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [None]:
# Ignore specific warnings for cleaner output
warnings.filterwarnings("ignore", message="overflow encountered in exp")

In [None]:
# --- 1. Data Availability Note ---
# NOTE: The raw survey data used in this script ('all_responses_coded.csv')
# contains respondent-level information and is not publicly available due
# to IRB-19-00210 privacy restrictions. This script is provided for
# methodological transparency.

# Use a relative path for local environments
SURVEY_FILEPATH = '../data/all_responses_coded.csv'


# --- 2. Load Data and Define Variables ---
try:
    df_raw = pd.read_csv(SURVEY_FILEPATH)
    print("--> Raw survey data loaded successfully.")
except FileNotFoundError:
    print(f"File not found: {SURVEY_FILEPATH}")
    print("As noted in the README, raw survey data is restricted to protect participant privacy.")
    # Initialize empty DataFrame to avoid downstream errors in code visibility
    df_raw = pd.DataFrame()

policy_vars = ['q2_4', 'q4_8', 'q1_2', 'q4_6', 'q4_13', 'q3_1', 'q3_2', 'q4_7', 'q1_4']

# 3. CALCULATE VIF (Multicollinearity Test)
# We do this for each city separately as the model is city-specific
for city in ['Jakarta', 'Phnom Penh']:
    print(f"\n--- Robustness Test: VIF for {city} ---")
    city_data = df_raw[df_raw['City'] == city][policy_vars].dropna()

    # Add a constant for VIF calculation
    X = city_data.assign(const=1)

    vif_data = pd.DataFrame()
    vif_data["Variable"] = policy_vars
    vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(policy_vars))]
    print(vif_data.round(2))

# 4. CALCULATE CRONBACH'S ALPHA (Internal Consistency/Reliability)
def cronbach_alpha(df):
    df_corr = df.corr()
    n_items = df.shape[1]
    average_corr = df_corr.values[np.triu_indices_from(df_corr.values, k=1)].mean()
    alpha = (n_items * average_corr) / (1 + (n_items - 1) * average_corr)
    return alpha

print("\n--- Reliability Test: Cronbach's Alpha ---")
for city in ['Jakarta', 'Phnom Penh']:
    city_items = df_raw[df_raw['City'] == city][policy_vars].dropna()
    alpha_val = cronbach_alpha(city_items)
    print(f"{city} Internal Consistency (Alpha): {alpha_val:.3f}")


--- Robustness Test: VIF for Jakarta ---
  Variable   VIF
0     q2_4  1.03
1     q4_8  1.03
2     q1_2  1.06
3     q4_6  1.04
4    q4_13  1.01
5     q3_1  1.17
6     q3_2  1.22
7     q4_7  1.03
8     q1_4  1.05

--- Robustness Test: VIF for Phnom Penh ---
  Variable   VIF
0     q2_4  1.14
1     q4_8  1.03
2     q1_2  1.14
3     q4_6  1.06
4    q4_13  1.11
5     q3_1  2.13
6     q3_2  2.13
7     q4_7  1.06
8     q1_4  1.17

--- Reliability Test: Cronbach's Alpha ---
Jakarta Internal Consistency (Alpha): 0.299
Phnom Penh Internal Consistency (Alpha): 0.547
