In [9]:
import numpy as np
import pandas as pd
from scipy import stats

def load_mi_data(file_path):
    data = pd.read_csv(file_path)
    return data[['l', 'r', 'w', 'z', 'p']]

def impute_rectangles(mi_rectangles, probabilities, num_imputations):
    return mi_rectangles[np.random.choice(len(mi_rectangles), 
                                          size=num_imputations, 
                                          p=probabilities)]

def is_concordant(rect1, rect2):
    if (rect1[1] < rect2[0] and rect1[3] < rect2[2]) or \
       (rect2[1] < rect1[0] and rect2[3] < rect1[2]):
        return 1
    elif (rect1[1] < rect2[0] and rect1[2] > rect2[3]) or \
         (rect2[1] < rect1[0] and rect2[2] > rect1[3]):
        return -1
    else:
        return 0

def kendall_tau_for_imputation(imputed_rectangles, t):
    n = len(imputed_rectangles)
    concordant_pairs = 0
    total_pairs = 0
    
    for i in range(n):
        for j in range(i+1, n):
            if imputed_rectangles[i][1] <= t < imputed_rectangles[i][3] and \
               imputed_rectangles[j][1] <= t < imputed_rectangles[j][3]:
                concordant_pairs += is_concordant(imputed_rectangles[i], imputed_rectangles[j])
                total_pairs += 1
    
    if total_pairs == 0:
        return 0
    
    return concordant_pairs / total_pairs

def test_markov_property(mi_data, s_values, num_imputations=1000):
    mi_rectangles = mi_data[['l', 'r', 'w', 'z']].values
    probabilities = mi_data['p'].values
    
    results = []
    for s in s_values:
        tau_values = []
        subsample_sizes = []
        
        for _ in range(num_imputations):
            imputed_data = impute_rectangles(mi_rectangles, probabilities, len(mi_rectangles))
            relevant_data = imputed_data[(imputed_data[:, 1] <= s) & (s < imputed_data[:, 3])]
            
            if len(relevant_data) > 1:
                tau = kendall_tau_for_imputation(relevant_data, s)
                tau_values.append(tau)
                subsample_sizes.append(len(relevant_data))
        
        if tau_values:
            tau_mean = np.mean(tau_values)
            tau_variance = np.var(tau_values, ddof=1)
            
            # Compute the variance as in equation (11)
            within_imputation_var = np.mean([2/((n*(n-1))) for n in subsample_sizes])
            between_imputation_var = (1 + 1/num_imputations) * tau_variance
            total_variance = within_imputation_var + between_imputation_var
            
            z_statistic = tau_mean / np.sqrt(total_variance)
            p_value = 2 * (1 - stats.norm.cdf(abs(z_statistic)))
            
            results.append({
                's': s,
                'tau': tau_mean,
                'std_dev': np.sqrt(total_variance),
                'z_statistic': z_statistic,
                'p_value': p_value,
                'avg_subsample_size': np.mean(subsample_sizes)
            })
    
    return pd.DataFrame(results)

# Main execution
if __name__ == "__main__":
    mi_data = load_mi_data('MI.csv')
    s_values = range(9, 22)  # As in Table 3
    results = test_markov_property(mi_data, s_values)
    print(results)

     s       tau   std_dev  z_statistic   p_value  avg_subsample_size
0    9  0.162481  0.649494     0.250165  0.802459            3.825254
1   10  0.216063  0.469287     0.460408  0.645224            5.548519
2   11  0.148926  0.212636     0.700378  0.483692           11.565000
3   12  0.067389  0.176733     0.381302  0.702979           12.851000
4   13  0.095988  0.128833     0.745057  0.456238           16.692000
5   14  0.015427  0.134395     0.114788  0.908613           18.277000
6   15  0.054019  0.117559     0.459502  0.645874           21.614000
7   16  0.029078  0.106984     0.271795  0.785780           22.101000
8   17  0.005975  0.098520     0.060647  0.951640           21.659000
9   18 -0.002833  0.093364    -0.030342  0.975795           20.082000
10  19 -0.024394  0.085391    -0.285679  0.775124           19.729000
11  20 -0.014461  0.084845    -0.170438  0.864665           19.275000
12  21  0.006429  0.080824     0.079539  0.936604           18.742000
