In [2]:
import numpy as np
import pandas as pd
from scipy import stats

def load_mi_data(file_path):
    data = pd.read_csv(file_path)
    return data[['l', 'r', 'w', 'z', 'p']]

def impute_rectangles(mi_rectangles, probabilities, num_imputations):
    return mi_rectangles[np.random.choice(len(mi_rectangles), 
                                          size=num_imputations, 
                                          p=probabilities)]

def is_concordant(rect1, rect2):
    if (rect1[1] < rect2[0] and rect1[3] < rect2[2]) or \
       (rect2[1] < rect1[0] and rect2[3] < rect1[2]):
        return 1
    elif (rect1[1] < rect2[0] and rect1[2] > rect2[3]) or \
         (rect2[1] < rect1[0] and rect2[2] > rect1[3]):
        return -1
    else:
        return 0

def kendall_tau_for_imputation(imputed_rectangles, t):
    n = len(imputed_rectangles)
    concordant_pairs = 0
    total_pairs = 0
    
    for i in range(n):
        for j in range(i+1, n):
            if imputed_rectangles[i][1] <= t < imputed_rectangles[i][3] and \
               imputed_rectangles[j][1] <= t < imputed_rectangles[j][3]:
                concordant_pairs += is_concordant(imputed_rectangles[i], imputed_rectangles[j])
                total_pairs += 1
    
    if total_pairs == 0:
        return 0
    
    return concordant_pairs / total_pairs

def test_markov_property(mi_data, s_values, num_imputations=1000):
    mi_rectangles = mi_data[['l', 'r', 'w', 'z']].values
    probabilities = mi_data['p'].values
    
    results = []
    for s in s_values:
        tau_values = []
        subsample_sizes = []
        
        for _ in range(num_imputations):
            imputed_data = impute_rectangles(mi_rectangles, probabilities, len(mi_rectangles))
            relevant_data = imputed_data[(imputed_data[:, 1] <= s) & (s < imputed_data[:, 3])]
            
            if len(relevant_data) > 1:
                tau = kendall_tau_for_imputation(relevant_data, s)
                tau_values.append(tau)
                subsample_sizes.append(len(relevant_data))
        
        if tau_values:
            tau_mean = np.mean(tau_values)
            tau_variance = np.var(tau_values, ddof=1)
            
            # Compute the variance as in equation (11)
            within_imputation_var = np.mean([2/((n*(n-1))) for n in subsample_sizes])
            between_imputation_var = (1 + 1/num_imputations) * tau_variance
            total_variance = within_imputation_var + between_imputation_var
            
            z_statistic = tau_mean / np.sqrt(total_variance)
            p_value = 2 * (1 - stats.norm.cdf(abs(z_statistic)))
            
            results.append({
                's': s,
                'tau': tau_mean,
                'std_dev': np.sqrt(total_variance),
                'z_statistic': z_statistic,
                'p_value': p_value,
                'avg_subsample_size': np.mean(subsample_sizes)
            })
    
    return pd.DataFrame(results)

# Main execution
if __name__ == "__main__":
    mi_data = load_mi_data('MI.csv')
    s_values = range(9, 22)  # As in Table 3
    results = test_markov_property(mi_data, s_values)
    print(results)

     s       tau   std_dev  z_statistic   p_value  avg_subsample_size
0    9  0.144362  0.641765     0.224946  0.822021            3.861798
1   10  0.191787  0.469289     0.408675  0.682778            5.539007
2   11  0.140896  0.207395     0.679359  0.496910           11.514000
3   12  0.080190  0.179602     0.446485  0.655247           12.649000
4   13  0.095230  0.124251     0.766433  0.443418           16.754000
5   14  0.018045  0.139632     0.129235  0.897172           18.326000
6   15  0.052735  0.116860     0.451268  0.651797           21.738000
7   16  0.025679  0.106653     0.240777  0.809728           22.163000
8   17 -0.001052  0.098763    -0.010656  0.991498           21.562000
9   18 -0.004602  0.093363    -0.049295  0.960684           20.139000
10  19 -0.022592  0.084222    -0.268242  0.788513           19.811000
11  20 -0.014395  0.083683    -0.172013  0.863427           19.403000
12  21  0.006453  0.080981     0.079691  0.936483           18.738000
