In [1]:
import torch
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist
import numpy as np
import os

# Pseudo-labeling with confidence
def assign_pseudo_labels_with_confidence(features, class_means):
    distances = cdist(features, class_means, metric='euclidean')  # Compute distance to each class mean
    closest_class_indices = np.argmin(distances, axis=1)  # Assign to closest class
    closest_distances = distances[np.arange(len(distances)), closest_class_indices]
    total_distances = distances.sum(axis=1)

    # Confidence: Inverse of normalized closest distance
    confidences = 1 - (closest_distances / total_distances)
    return closest_class_indices, confidences

# Compute class statistics
def compute_class_statistics(features, labels):
    class_means = []
    for cls in np.unique(labels):
        cls_features = features[labels == cls]
        mean = np.mean(cls_features, axis=0)
        class_means.append(mean)
    return np.array(class_means)

# Update memory buffer
def update_memory_buffer(memory_buffer, features, labels, budget=100):
    """Update memory buffer with representative samples."""
    new_samples = list(zip(features, labels))
    memory_buffer.extend(new_samples)
    if len(memory_buffer) > budget:
        memory_buffer = memory_buffer[:budget]  # Trim to budget size
    return memory_buffer

# Consolidate class means
def consolidate_internal_distribution(class_means_old, class_means_new, alpha=0.5):
    """Blend old and new class means using exponential moving average."""
    consolidated_means = []
    for i in range(len(class_means_new)):
        if i < len(class_means_old):
            consolidated_mean = alpha * class_means_old[i] + (1 - alpha) * class_means_new[i]
        else:
            consolidated_mean = class_means_new[i]
        consolidated_means.append(consolidated_mean)
    return np.array(consolidated_means)

# Load precomputed features
def load_precomputed_features(dataset_type, dataset_index):
    features_file = f"extracted_feature/part_two_dataset/{dataset_type}/D{dataset_index}_{dataset_type}_features.npy"
    features = np.load(features_file)
    return features

# Task 2: Training and evaluation
def task_two_model(initial_class_means, confidence_threshold=0.9):
    class_means = initial_class_means
    memory_buffer = []  # Memory buffer for representative samples
    accuracies = np.zeros((10, 20))  # 10 rows for d1-d10, 20 columns for held-out datasets (D1-D20)

    # Load features for Task 1 evaluation datasets (D1-D10)
    task1_eval_features = []
    for j in range(1, 11):  # D1 to D10
        eval_features = np.load(f"extracted_feature/part_one_dataset/eval/D{j}_eval_features.npy")
        task1_eval_features.append(eval_features)

    # Load features for Task 2 datasets (d1-d10)
    task2_train_features = []
    task2_eval_features = []
    for i in range(1, 11):  # d1 to d10
        # Load training dataset features
        train_features = load_precomputed_features("train", i)
        task2_train_features.append(train_features)

        # Load evaluation dataset features
        eval_features = load_precomputed_features("eval", i)
        task2_eval_features.append(eval_features)

    # Training on Task 2 datasets
    for i in range(1, 11):  # d1 to d10
        X_di_features = task2_train_features[i - 1]

        # Generate pseudo-labels with confidence
        pseudo_labels, confidences = assign_pseudo_labels_with_confidence(X_di_features, class_means)

        # Filter high-confidence samples
        high_confidence_mask = confidences >= confidence_threshold
        X_di_features = X_di_features[high_confidence_mask]
        pseudo_labels = pseudo_labels[high_confidence_mask]

        # Update class means
        updated_means = compute_class_statistics(X_di_features, pseudo_labels)
        class_means = consolidate_internal_distribution(class_means, updated_means)

        # Update memory buffer
        memory_buffer = update_memory_buffer(memory_buffer, X_di_features, pseudo_labels, budget=100)

        # Evaluate on Task 1 datasets (D1-D10)
        for j in range(1, 11):  # D1 to D10
            X_eval_features = task1_eval_features[j - 1]
            predictions, _ = assign_pseudo_labels_with_confidence(X_eval_features, class_means)

            # If evaluation labels are known for testing purposes, compute accuracy
            eval_labels_file = f"extracted_feature/part_one_dataset/eval/D{j}_eval_labels.npy"
            if os.path.exists(eval_labels_file):
                y_eval = np.load(eval_labels_file)
                accuracies[i - 1, j - 1] = accuracy_score(y_eval, predictions)

        # Evaluate on Task 2 datasets (d1-di)
        for j in range(1, i + 1):  # d1 to di
            X_eval_features = task2_eval_features[j - 1]
            predictions, _ = assign_pseudo_labels_with_confidence(X_eval_features, class_means)

            # If evaluation labels are known for testing purposes, compute accuracy
            eval_labels_file = f"extracted_feature/part_two_dataset/eval/D{j}_eval_labels.npy"
            if os.path.exists(eval_labels_file):
                y_eval = np.load(eval_labels_file)
                accuracies[i - 1, j + 9] = accuracy_score(y_eval, predictions)

        # Print progress
        print(f"Task 2: After processing task_two_Dataset D{i}, Accuracy Matrix Row {i - 1}: {accuracies[i - 1, :i + 9]}")

    return accuracies

# Main execution
if __name__ == "__main__":
    # Load the initial class means from Task 1
    initial_class_means = torch.load('f10_class_means_task1.npy')
    print("Loaded f10 class means for Task 2.")

    # Task 2: Proceed with d1-d10
    accuracies_task_2 = task_two_model(initial_class_means, confidence_threshold=0.9)

    print("Accuracy Matrix for Task 2:")
    print(accuracies_task_2)


Loaded f10 class means for Task 2.
Task 2: After processing task_two_Dataset D1, Accuracy Matrix Row 0: [0.8924 0.8928 0.8944 0.8964 0.892  0.898  0.8948 0.8948 0.8936 0.8964]
Task 2: After processing task_two_Dataset D2, Accuracy Matrix Row 1: [0.8972 0.8988 0.898  0.8956 0.8988 0.9012 0.8992 0.896  0.8992 0.8988
 0.7136]
Task 2: After processing task_two_Dataset D3, Accuracy Matrix Row 2: [0.8988 0.8984 0.9004 0.8988 0.9    0.902  0.9012 0.8952 0.9    0.898
 0.7128 0.5936]
Task 2: After processing task_two_Dataset D4, Accuracy Matrix Row 3: [0.896  0.8968 0.8988 0.8988 0.8996 0.9032 0.8992 0.8964 0.8996 0.8988
 0.7112 0.5848 0.7724]
Task 2: After processing task_two_Dataset D5, Accuracy Matrix Row 4: [0.8936 0.8964 0.8968 0.8976 0.8964 0.9    0.9004 0.8948 0.8992 0.9
 0.7064 0.574  0.7652 0.812 ]
Task 2: After processing task_two_Dataset D6, Accuracy Matrix Row 5: [0.8968 0.8988 0.8992 0.8992 0.8996 0.8988 0.902  0.8964 0.8996 0.8988
 0.7096 0.578  0.7744 0.8136 0.8712]
Task 2: After