In [1]:
! pip install gurobipy

Defaulting to user installation because normal site-packages is not writeable


In [4]:
import time
import numpy as np
import pandas as pd
import gurobipy as gb
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from itertools import product

# Load data
df = pd.read_csv('GA_features.csv')

# Define constants
SOCIAL_CATEGORIES = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
BUDGET = 130
TAU_VALUES = [0.43, 0.5, 0.65, 0.75, 0.85, None]

# Extract relevant features
X_poll = df['n_poll'].values.reshape(-1, 1)  # Local polling stations
A_frac = df[[f'frac_registered_{c}' for c in SOCIAL_CATEGORIES]].values
y_train = df['frac_votes'].values

# Load neighborhood matrices
NEIGHBOR_INDEX_MATRIX = np.load('index_matrix.npy')
NEIGHBOR_DISTANCE_MATRIX = np.load('distance_matrix.npy')

# Compute **distance-weighted** neighbor polling impact
def compute_weighted_neighborhood_polling(n_poll, neighbor_index_matrix, neighbor_distance_matrix):
    num_counties = n_poll.shape[0]
    P_N_weighted = np.zeros(num_counties)

    for i in range(num_counties):
        neighbors = neighbor_index_matrix[i, :]
        distances = neighbor_distance_matrix[i, neighbors]

        # Compute weighted sum (closer neighbors contribute more)
        weights = np.exp(-distances)  # Exponential decay for distance
        P_N_weighted[i] = np.sum(weights * n_poll[neighbors])

    return P_N_weighted

# Compute P_N(i)
P_N_weighted = compute_weighted_neighborhood_polling(X_poll.flatten(), NEIGHBOR_INDEX_MATRIX, NEIGHBOR_DISTANCE_MATRIX)

# Create full and ablated datasets
X_full = np.column_stack((X_poll, P_N_weighted))  # Full model: local + weighted neighbor effect
X_ablated = X_poll.reshape(-1, 1)                 # Ablated model: only local effect

# Train both models
model_full = LinearRegression(fit_intercept=True).fit(X_full, y_train)
model_ablated = LinearRegression(fit_intercept=True).fit(X_ablated, y_train)

# Predictions
y_pred_full = model_full.predict(X_full)
y_pred_ablated = model_ablated.predict(X_ablated)

# Compute performance metrics
mse_full = mean_squared_error(y_train, y_pred_full)
mse_ablated = mean_squared_error(y_train, y_pred_ablated)
r2_full = r2_score(y_train, y_pred_full)
r2_ablated = r2_score(y_train, y_pred_ablated)

print(f"Model A (Full) - MSE: {mse_full:.4f}, R²: {r2_full:.4f}")
print(f"Model B (Ablated) - MSE: {mse_ablated:.4f}, R²: {r2_ablated:.4f}")

# Find counties where interventions changed significantly
prediction_difference = np.abs(y_pred_full - y_pred_ablated)
threshold = 0.01  # **Lowered threshold for meaningful changes**
changed_interventions = np.where(prediction_difference > threshold)[0]
print(f"Number of counties where intervention changed: {len(changed_interventions)}")
print(f"Changed counties: {changed_interventions}")

# Optimization function (now accepts either y_pred_full or y_pred_ablated)
def optimize_interventions(y_pred, tau_value, A_frac):
    print(f'Running optimization for tau={tau_value}')
    model = gb.Model()

    # Decision variables
    interventions = model.addVars(X_poll.shape[0], vtype=gb.GRB.BINARY, name="interventions")
    impact_vars = model.addVars(X_poll.shape[0], vtype=gb.GRB.CONTINUOUS, name="impact")

    # Budget constraint
    model.addConstr(sum(interventions[i] for i in range(X_poll.shape[0])) <= BUDGET, "budget_constraint")

    # Apply factual impact
    for index in range(X_poll.shape[0]):
        factual_impact = y_pred[index]  # **Now allows Full or Ablated predictions**
        model.addConstr(impact_vars[index] == factual_impact * interventions[index])

        # Apply fairness constraint if tau_value is specified
        if tau_value is not None:
            for group_idx in range(A_frac.shape[1]):
                model.addConstr(impact_vars[index] <= tau_value)

    # Maximize the total impact
    model.setObjective(sum(impact_vars[i] for i in range(X_poll.shape[0])), gb.GRB.MAXIMIZE)
    model.optimize()

    if model.status == gb.GRB.OPTIMAL:
        return np.array([interventions[i].X for i in range(X_poll.shape[0])]).astype(bool)
    else:
        raise RuntimeError("Optimization failed.")

# Run optimization for Full and Ablated models
for tau_value in TAU_VALUES:
    try:
        optimal_interventions_full = optimize_interventions(y_pred_full, tau_value, A_frac)
        optimal_interventions_ablated = optimize_interventions(y_pred_ablated, tau_value, A_frac)

        print(f"tau = {tau_value}")
        print(f"Optimal interventions (Full): {np.where(optimal_interventions_full)}")
        print(f"Optimal interventions (Ablated): {np.where(optimal_interventions_ablated)}")

        # Compare differences
        diff = np.where(optimal_interventions_full != optimal_interventions_ablated)[0]
        print(f"Counties where intervention changed due to ablation: {diff}")

    except RuntimeError:
        print(f"Optimization failed for tau<{tau_value}")


Model A (Full) - MSE: 0.0028, R²: 0.0230
Model B (Ablated) - MSE: 0.0028, R²: 0.0178
Number of counties where intervention changed: 5
Changed counties: [ 42  47  70 114 121]
Running optimization for tau=0.43
Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (linux64 - "Ubuntu 22.04.4 LTS")

CPU model: AMD EPYC 7662 64-Core Processor, instruction set [SSE2|AVX|AVX2]
Thread count: 128 physical cores, 256 logical processors, using up to 32 threads

Optimize a model with 1273 rows, 318 columns and 1590 nonzeros
Model fingerprint: 0x491c92c0
Variable types: 159 continuous, 159 integer (159 binary)
Coefficient statistics:
  Matrix range     [6e-01, 1e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [4e-01, 1e+02]
Found heuristic solution: objective -0.0000000
Presolve removed 1273 rows and 318 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.01 seconds (0.00 work units)
Thread count was