In [1]:
import numpy as np
from scipy import stats

# Data for each file
data = {
    'elliptic': {
        'attr': {
            'vanilla_star': [0.4356, 0.4347, 0.4348, 0.4355, 0.4357, 0.4359, 0.4366, 0.4346, 0.4351, 0.4357],
            'prior': [0.6266, 0.6239, 0.6220, 0.6583, 0.6332, 0.6540, 0.6326, 0.5899, 0.5760, 0.6185],
            'prior_star': [0.5286, 0.5228, 0.5352, 0.5495, 0.5409, 0.5522, 0.5360, 0.5129, 0.5122, 0.5269]
        },
        'no_attr': {
            'vanilla_star': [0.4340, 0.4336, 0.4346, 0.4358, 0.4351, 0.4385, 0.4350, 0.4345, 0.4342, 0.4340],
            'prior': [0.4713, 0.4935, 0.4707, 0.4753, 0.4950, 0.4756, 0.4735, 0.4530, 0.5251, 0.4930],
            'prior_star': [0.4340, 0.4334, 0.4345, 0.4357, 0.4351, 0.4384, 0.4350, 0.4345, 0.4342, 0.4340]
        }
    },
    'photo': {
        'attr': {
            'vanilla_star': [0.5744, 0.5851, 0.6045, 0.5639, 0.5704, 0.5772, 0.5845, 0.5500, 0.5260, 0.5911],
            'prior': [0.4347, 0.4369, 0.4341, 0.4267, 0.4123, 0.4313, 0.4177, 0.4257, 0.4292, 0.4031],
            'prior_star': [0.5297, 0.5401, 0.5576, 0.5214, 0.5239, 0.5320, 0.5396, 0.5100, 0.4911, 0.5435]
        },
        'no_attr': {
            'vanilla_star': [0.5760, 0.5709, 0.5816, 0.5502, 0.5691, 0.5668, 0.5622, 0.5801, 0.5761, 0.5627],
            'prior': [0.5945, 0.4951, 0.6422, 0.6548, 0.3430, 0.4692, 0.3468, 0.4683, 0.5329, 0.4301],
            'prior_star': [0.5762, 0.5711, 0.5821, 0.5511, 0.5687, 0.5669, 0.5617, 0.5802, 0.5766, 0.5627]
        }
    },
    'reddit': {
        'attr': {
            'vanilla_star': [0.6315, 0.6334, 0.6365, 0.6443, 0.6300, 0.6282, 0.6143, 0.6333, 0.6389, 0.6292],
            'prior': [0.5543, 0.5289, 0.4977, 0.4732, 0.4917, 0.4759, 0.4802, 0.5627, 0.5263, 0.4980],
            'prior_star': [0.6332, 0.6342, 0.6369, 0.6448, 0.6312, 0.6291, 0.6157, 0.6346, 0.6399, 0.6296]
        },
        'no_attr': {
            'vanilla_star': [0.6313, 0.6325, 0.6331, 0.6354, 0.6328, 0.6368, 0.6371, 0.6370, 0.6335, 0.6366],
            'prior': [0.4914, 0.4510, 0.4505, 0.4729, 0.4327, 0.4253, 0.4599, 0.4391, 0.4714, 0.4360],
            'prior_star': [0.6318, 0.6330, 0.6338, 0.6356, 0.6330, 0.6372, 0.6374, 0.6377, 0.6341, 0.6370]
        }
    }
}

# Function to calculate mean, standard deviation, and perform t-tests
def analyze_data(data):
    results = {}
    
    for dataset_name, versions in data.items():
        results[dataset_name] = {}
        for version_name, lists in versions.items():
            results[dataset_name][version_name] = {}
            for list_name, values in lists.items():
                mean = np.mean(values)
                std_dev = np.std(values)
                results[dataset_name][version_name][list_name] = {'mean': mean, 'std_dev': std_dev}

        # Perform t-tests between 'attr' and 'no_attr' versions
        results[dataset_name]['t_tests'] = {}
        for list_name in versions['attr']:
            t_stat, p_value = stats.ttest_ind(versions['attr'][list_name], versions['no_attr'][list_name])
            results[dataset_name]['t_tests'][list_name] = {'t_stat': t_stat, 'p_value': p_value}
    
    return results

# Run the analysis
analysis_results = analyze_data(data)

# Display the results
for dataset_name, dataset_results in analysis_results.items():
    print(f"\nDataset: {dataset_name}")
    for version_name, version_results in dataset_results.items():
        if version_name != 't_tests':
            print(f"  Version: {version_name}")
            for list_name, stats in version_results.items():
                print(f"    {list_name} - Mean: {stats['mean']:.4f}, Std Dev: {stats['std_dev']:.4f}")
        else:
            print(f"  T-Tests:")
            for list_name, t_test_results in version_results.items():
                print(f"    {list_name} - t_stat: {t_test_results['t_stat']:.4f}, p_value: {t_test_results['p_value']:.4f}")




Dataset: elliptic
  Version: attr
    vanilla_star - Mean: 0.4354, Std Dev: 0.0006
    prior - Mean: 0.6235, Std Dev: 0.0240
    prior_star - Mean: 0.5317, Std Dev: 0.0130
  Version: no_attr
    vanilla_star - Mean: 0.4349, Std Dev: 0.0013
    prior - Mean: 0.4826, Std Dev: 0.0188
    prior_star - Mean: 0.4349, Std Dev: 0.0013
  T-Tests:
    vanilla_star - t_stat: 1.0059, p_value: 0.3278
    prior - t_stat: 13.8923, p_value: 0.0000
    prior_star - t_stat: 22.1801, p_value: 0.0000

Dataset: photo
  Version: attr
    vanilla_star - Mean: 0.5727, Std Dev: 0.0211
    prior - Mean: 0.4252, Std Dev: 0.0104
    prior_star - Mean: 0.5289, Std Dev: 0.0178
  Version: no_attr
    vanilla_star - Mean: 0.5696, Std Dev: 0.0091
    prior - Mean: 0.4977, Std Dev: 0.1045
    prior_star - Mean: 0.5697, Std Dev: 0.0091
  T-Tests:
    vanilla_star - t_stat: 0.4103, p_value: 0.6864
    prior - t_stat: -2.0725, p_value: 0.0529
    prior_star - t_stat: -6.1430, p_value: 0.0000

Dataset: reddit
  Version: a

In [2]:
import numpy as np
from scipy import stats

# Data for each file
data = {
    'elliptic': {
        'attr': {
            'vanilla_star': [0.4356, 0.4347, 0.4348, 0.4355, 0.4357, 0.4366, 0.4346, 0.4351, 0.4357],
            'prior': [0.6266, 0.6239, 0.6220, 0.6583, 0.6332, 0.6326, 0.5899, 0.5760, 0.6185],
            'prior_star': [0.5286, 0.5228, 0.5352, 0.5495, 0.5409, 0.5360, 0.5129, 0.5122, 0.5269]
        },
        'no_attr': {
            'vanilla_star': [0.4340, 0.4336, 0.4346, 0.4358, 0.4351, 0.4385, 0.4350, 0.4345, 0.4342, 0.4340],
            'prior': [0.4713, 0.4935, 0.4707, 0.4753, 0.4950, 0.4756, 0.4735, 0.4530, 0.5251, 0.4930],
            'prior_star': [0.4340, 0.4334, 0.4345, 0.4357, 0.4351, 0.4384, 0.4350, 0.4345, 0.4342, 0.4340]
        }
    },
    'photo': {
        'attr': {
            'vanilla_star': [0.5744, 0.5851, 0.6045, 0.5639, 0.5704, 0.5772, 0.5845, 0.5500, 0.5260, 0.5911],
            'prior': [0.4347, 0.4369, 0.4341, 0.4267, 0.4123, 0.4313, 0.4177, 0.4257, 0.4292, 0.4031],
            'prior_star': [0.5297, 0.5401, 0.5576, 0.5214, 0.5239, 0.5320, 0.5396, 0.5100, 0.4911, 0.5435]
        },
        'no_attr': {
            'vanilla_star': [0.5760, 0.5709, 0.5816, 0.5502, 0.5691, 0.5668, 0.5622, 0.5801, 0.5761, 0.5627],
            'prior': [0.5945, 0.4951, 0.6422, 0.6548, 0.3430, 0.4692, 0.3468, 0.4683, 0.5329, 0.4301],
            'prior_star': [0.5762, 0.5711, 0.5821, 0.5511, 0.5687, 0.5669, 0.5617, 0.5802, 0.5766, 0.5627]
        }
    },
    'reddit': {
        'attr': {
            'vanilla_star': [0.6315, 0.6334, 0.6365, 0.6443, 0.6300, 0.6282, 0.6143, 0.6333, 0.6389, 0.6292],
            'prior': [0.5543, 0.5289, 0.4977, 0.4732, 0.4917, 0.4759, 0.4802, 0.5627, 0.5263, 0.4980],
            'prior_star': [0.6332, 0.6342, 0.6369, 0.6448, 0.6312, 0.6291, 0.6157, 0.6346, 0.6399, 0.6296]
        },
        'no_attr': {
            'vanilla_star': [0.6313, 0.6325, 0.6331, 0.6354, 0.6328, 0.6368, 0.6371, 0.6370, 0.6335, 0.6366],
            'prior': [0.4914, 0.4510, 0.4505, 0.4729, 0.4327, 0.4253, 0.4599, 0.4391, 0.4714, 0.4360],
            'prior_star': [0.6318, 0.6330, 0.6338, 0.6356, 0.6330, 0.6372, 0.6374, 0.6377, 0.6341, 0.6370]
        }
    }
}

# Function to calculate mean, standard deviation, and confidence interval
def calculate_statistics(data):
    results = {}
    confidence_level = 0.95
    
    for dataset_name, versions in data.items():
        results[dataset_name] = {}
        for version_name, lists in versions.items():
            results[dataset_name][version_name] = {}
            for list_name, values in lists.items():
                mean = np.mean(values)
                std_dev = np.std(values)
                n = len(values)
                t_value = stats.t.ppf((1 + confidence_level) / 2., n-1)
                margin_of_error = t_value * (std_dev / np.sqrt(n))
                confidence_interval = (mean - margin_of_error, mean + margin_of_error)
                results[dataset_name][version_name][list_name] = {
                    'mean': mean,
                    'std_dev': std_dev,
                    'confidence_interval': confidence_interval
                }
    
    return results

# Run the analysis
analysis_results = calculate_statistics(data)

# Display the results
for dataset_name, dataset_results in analysis_results.items():
    print(f"\nDataset: {dataset_name}")
    for version_name, version_results in dataset_results.items():
        print(f"  Version: {version_name}")
        for list_name, stats in version_results.items():
            print(f"    {list_name} - Mean: {stats['mean']:.4f}, Std Dev: {stats['std_dev']:.4f}, "
                  f"95% CI: ({stats['confidence_interval'][0]:.4f}, {stats['confidence_interval'][1]:.4f})")



Dataset: elliptic
  Version: attr
    vanilla_star - Mean: 0.4354, Std Dev: 0.0006, 95% CI: (0.4349, 0.4358)
    prior - Mean: 0.6201, Std Dev: 0.0229, 95% CI: (0.6025, 0.6377)
    prior_star - Mean: 0.5294, Std Dev: 0.0117, 95% CI: (0.5205, 0.5384)
  Version: no_attr
    vanilla_star - Mean: 0.4349, Std Dev: 0.0013, 95% CI: (0.4340, 0.4359)
    prior - Mean: 0.4826, Std Dev: 0.0188, 95% CI: (0.4692, 0.4960)
    prior_star - Mean: 0.4349, Std Dev: 0.0013, 95% CI: (0.4339, 0.4358)

Dataset: photo
  Version: attr
    vanilla_star - Mean: 0.5727, Std Dev: 0.0211, 95% CI: (0.5576, 0.5878)
    prior - Mean: 0.4252, Std Dev: 0.0104, 95% CI: (0.4178, 0.4326)
    prior_star - Mean: 0.5289, Std Dev: 0.0178, 95% CI: (0.5162, 0.5416)
  Version: no_attr
    vanilla_star - Mean: 0.5696, Std Dev: 0.0091, 95% CI: (0.5631, 0.5761)
    prior - Mean: 0.4977, Std Dev: 0.1045, 95% CI: (0.4230, 0.5724)
    prior_star - Mean: 0.5697, Std Dev: 0.0091, 95% CI: (0.5632, 0.5762)

Dataset: reddit
  Version: att