In [None]:
!pip install matplotlib > /dev/null 2>&1 && echo "ok" || echo "fail"; 


In [None]:

import os
import numpy as np
import __init__
from algorithm import inter_class_distance, intra_class_distance, visualize_inter_intra_class_distances, cohens_d
from utils import load_features

analysis_output_dir = os.path.join("output")
if not os.path.exists(analysis_output_dir):
    os.makedirs(analysis_output_dir)
    
VECTOR_FEATURE_LENGTH_NORMALIZED = True
METHOD = "cosine"

In [None]:

def get_clustered_features(feature_vectors: np.ndarray, feature_labels: np.ndarray):
    """
    Get clustered features.

    Args:
        feature_vectors (list): List of features.
        feature_labels (list): List of labels.
    Returns:
        list: List of clustered features.
    """

    labels = np.unique(feature_labels)
    label_count = labels.shape[0]
    classes = [[] for _ in range(label_count)]
    for i in range(feature_vectors.shape[0]):
        classes[feature_labels[i]].append(feature_vectors[i])
    np_classes = np.array(classes)
    return np_classes


def get_all_inter_classes(clustered_feature_vectors: np.ndarray):
    
    cluster_count = clustered_feature_vectors.shape[0]
    inter_class_distances = []
    distance = 0
    for i in range(cluster_count):
        for j in range(cluster_count):
            if i == j:
                continue
            distance += inter_class_distance(clustered_feature_vectors[i], clustered_feature_vectors[j], METHOD)    
        inter_class_distances.append(distance / (cluster_count - 1))
        distance = 0
        if VECTOR_FEATURE_LENGTH_NORMALIZED and METHOD == "euclidean":
            inter_class_distances[i] = inter_class_distances[i] / clustered_feature_vectors[0].shape[1]

    print("clustered_feature_vectors[0].shape[1]",clustered_feature_vectors[0].shape[1])
    return inter_class_distances

def get_all_intra_classes(clustered_feature_vectors: np.ndarray):
        
        cluster_count = clustered_feature_vectors.shape[0]
        intra_class_distances = []
        for i in range(cluster_count):
            intra_class_distances.append(intra_class_distance(clustered_feature_vectors[i], METHOD))
            if VECTOR_FEATURE_LENGTH_NORMALIZED and METHOD == "euclidean":
                intra_class_distances[i] = intra_class_distances[i] / clustered_feature_vectors[0].shape[1]
        print("clustered_feature_vectors[0].shape[1]",clustered_feature_vectors[0].shape[1])

        return intra_class_distances
    
def apply_zscore_normalization(feature_vectors: np.ndarray):
    return (feature_vectors - feature_vectors.mean(axis=0)) / feature_vectors.std(axis=0)    

## Resnet 50

In [None]:

model_keyword = "resnet50_224_21_fs(1, 2048)"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)
d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")





## RESNET 50 FINE TUNING

In [None]:
model_keyword = "resnet50_ft_224_21_AIT90k_fs(1, 2048)"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)

d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

## RESNET 50 FINE TUNING (26x100 training dataset)

In [None]:
model_keyword = "resnet50_ft_224_21_AIT2-6k_fs(1, 2048)"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)

d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

## VIT B16

In [None]:
model_keyword = "vit_b_16"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)

d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

## VIT L16

In [None]:
model_keyword = "vit_l_16"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

## VIT L32

In [None]:
model_keyword = "vit_l_32_pc_224_21_fs(1, 1024)"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

## VIT L32 (26x100 training dataset)

In [None]:
model_keyword = "vit_l_32_pc_224_21_AIT2-6k_fs(1, 1024)"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

## VIT H14

In [None]:
model_keyword = "vit_h_14"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

# SWIN_B

In [None]:
model_keyword = "swin_b"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

# SWIN_T

In [None]:
model_keyword = "swin_t"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

# SWIN_S

In [None]:
model_keyword = "swin_s"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

# SWIN_B V2

In [None]:
model_keyword = "swin_v2_b"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

# SWIN_T V2

In [None]:
model_keyword = "swin_v2_t"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")

# SWIN_S V2

In [None]:
model_keyword = "swin_v2_s"
features = load_features(keyword=model_keyword)
feature_vectors =[feature["feature"] for feature in features]
feature_labels = [label["label_id"] for label in features]



np_feature_labels = np.array(feature_labels)
np_feature_vectors = np.array(feature_vectors).squeeze(axis = 1)
np_normalized_feature_vectors = apply_zscore_normalization(np_feature_vectors)

clustered_feature_vectors = get_clustered_features(feature_vectors=np_normalized_feature_vectors, feature_labels=feature_labels)



inter_class_distances = get_all_inter_classes(clustered_feature_vectors)
intra_class_distances = get_all_intra_classes(clustered_feature_vectors)

print(f"inter_class_distances: {inter_class_distances}")
print(f"intra_class_distances: {intra_class_distances}")
fig = visualize_inter_intra_class_distances(inter_class_distances=inter_class_distances, intra_class_distances=intra_class_distances, keywords=[model_keyword, METHOD])
saved_path = os.path.join(analysis_output_dir, f"{model_keyword}_k_{len(intra_class_distances)}.png")
fig.savefig(saved_path)


d = cohens_d(np.array(inter_class_distances), np.array(intra_class_distances))
print(f"Cohen's d: {d}")