# Calculate statistics


In [23]:
import json
import pandas as pd
import numpy as np
from scipy import stats

In [72]:
alpha = 0.05
configs = [
    {
        "metric_file_path": "../runs/DDI_malignant-v1-metrics.txt",  #
        "model_name": "lr",  #
        "feature_identifier": "DDI_malignant-ViT_T16-ImageNet",  #
        "number_of_samples": "100", #
    },
    {
        "metric_file_path": "../runs/DDI_malignant-v1-metrics.txt",  #
        "model_name": "lr",  #
        "feature_identifier": "DDI_malignant-ViT_T16-Derma",  #
        "number_of_samples": "100", #
    }
]

In [73]:
a = np.array([1,2,3,4,5,6,7,8,9,10])
b = np.array([2,3,4,5,6,7,8,9,10,11])
# b = np.array([12,13,14,15,16,17,18,19,20,21])

mean1 = np.mean(a)
std1 = np.std(a, ddof=1)
nobs1 = np.size(a)

mean2 = np.mean(b)
std2 = np.std(b, ddof=1)
nobs2 = np.size(b)

print(f"mean1: {mean1}, std1: {std1}, nobs1: {nobs1}, mean2: {mean2}, std2: {std2}, nobs2: {nobs2}")
t_stat1, p_value1 = stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=False)

print(f"t_stat1: {t_stat1}, p_value1: {p_value1}")

t_stat2, p_value2 = stats.ttest_ind(a, b, equal_var=False)
print(f"t_stat2: {t_stat2}, p_value2: {p_value2}")

assert t_stat1 == t_stat2
assert p_value1 == p_value2


mean1: 5.5, std1: 3.0276503540974917, nobs1: 10, mean2: 6.5, std2: 3.0276503540974917, nobs2: 10
t_stat1: -0.7385489458759963, p_value1: 0.4697020728008008
t_stat2: -0.7385489458759963, p_value2: 0.4697020728008008


In [74]:
def number_to_string(value):
    if value == value:
        return str(int(value))
    else:  # NaN
        return "All"

stat_values = []
for config in configs:
    metric_file = open(config["metric_file_path"], "r")
    content = metric_file.read().replace("\n", "").replace("}{", "},{")
    entries = json.loads("[" + content + "]")
    print(f"Read {len(entries)} entries loaded")
    df = pd.DataFrame.from_records(entries)
    df["number_of_samples"] = df["number_of_samples"].apply(number_to_string)
    for filter_name in ["model_name", "feature_identifier", "number_of_samples"]:
        df = df[df[filter_name].str.contains(config[filter_name])]
        distinct_values = df[filter_name].unique()
        print(f"{filter_name}: {distinct_values}")
        assert 1 == len(distinct_values)

    mean = df["f1_macro"].mean()
    std = df["f1_macro"].std(ddof=1)
    nobs = df["f1_macro"].size
    print(f"mean: {mean}, std: {std}, nobs: {nobs}")
    stat_values.append(mean)
    stat_values.append(std)
    stat_values.append(nobs)


Read 20040 entries loaded
model_name: ['lr']
feature_identifier: ['../datasets/intermediate-features/DDI_malignant-ViT_T16-ImageNet.csv']
number_of_samples: ['100']
mean: 0.6065437529305175, std: 0.0594727895711101, nobs: 100
Read 20040 entries loaded
model_name: ['lr']
feature_identifier: ['../datasets/intermediate-features/DDI_malignant-ViT_T16-Derma.csv']
number_of_samples: ['100']
mean: 0.48802751014885665, std: 0.06765575955127678, nobs: 100


In [75]:
t_stat, p_value = stats.ttest_ind_from_stats(*stat_values, equal_var=False)
print(f"t_stat: {t_stat}, p_value: {p_value}")

if alpha < p_value:
    print("Fail to reject the null hypothesis. There is no significant difference between the two sets")
else:
    print("Reject the null hypothesis. There is a significant difference between the two sets")


t_stat: 13.156851012275656, p_value: 1.0507193653781663e-28
Reject the null hypothesis. There is a significant difference between the two sets
