In [4]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import mahalanobis
from scipy.linalg import inv
from pulp import LpMinimize, LpProblem, LpVariable, lpSum

# Sample data: Create synthetic treated and control groups
data = pd.DataFrame({
    'id': range(1, 11),
    'pain': np.random.randint(1, 10, 10),
    'urgency': np.random.randint(1, 10, 10),
    'frequency': np.random.randint(1, 10, 10),
    'treated': np.random.choice([0, 1], 10)  # 1 = treated, 0 = control
})

# Separate treated and control groups
treated = data[data['treated'] == 1].reset_index(drop=True)
control = data[data['treated'] == 0].reset_index(drop=True)

# Compute covariance matrix and inverse for Mahalanobis distance
cov_matrix = np.cov(data[['pain', 'urgency', 'frequency']].T)
cov_inv = inv(cov_matrix)

def compute_distance_matrix(treated, control):
    """Computes Mahalanobis distance matrix between treated and control groups."""
    distances = {}
    for i, t in treated.iterrows():
        for j, c in control.iterrows():
            dist = mahalanobis(t[['pain', 'urgency', 'frequency']], 
                               c[['pain', 'urgency', 'frequency']], 
                               cov_inv)
            distances[(i, j)] = dist
    return distances

# Compute distance matrix
distance_matrix = compute_distance_matrix(treated, control)

# Optimization problem setup
prob = LpProblem("Balanced_Matching", LpMinimize)

# Define decision variables
x = {pair: LpVariable(f"x_{pair[0]}_{pair[1]}", cat='Binary') for pair in distance_matrix.keys()}

# Objective function: Minimize total distance
prob += lpSum(distance_matrix[pair] * x[pair] for pair in distance_matrix.keys())

# Constraints: Each treated patient is matched to one control
for i in range(len(treated)):
    prob += lpSum(x[(i, j)] for j in range(len(control)) if (i, j) in x) == 1

# Each control patient is matched at most once
for j in range(len(control)):
    prob += lpSum(x[(i, j)] for i in range(len(treated)) if (i, j) in x) <= 1

# Solve the optimization problem
prob.solve()

# Extract matched pairs
matches = [(i, j) for (i, j) in x.keys() if x[(i, j)].value() == 1]
print("Matched Pairs:", matches)

ModuleNotFoundError: No module named 'scipy'

In [5]:
!pip install scipy



Collecting scipy
  Obtaining dependency information for scipy from https://files.pythonhosted.org/packages/ff/ba/31c7a8131152822b3a2cdeba76398ffb404d81d640de98287d236da90c49/scipy-1.15.1-cp312-cp312-win_amd64.whl.metadata
  Using cached scipy-1.15.1-cp312-cp312-win_amd64.whl.metadata (60 kB)
Using cached scipy-1.15.1-cp312-cp312-win_amd64.whl (43.6 MB)
Installing collected packages: scipy
Successfully installed scipy-1.15.1



[notice] A new release of pip is available: 23.2.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip
