In [1]:
import torch
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist
import numpy as np
import os

# Pseudo-labeling based on closest class mean
def assign_pseudo_labels(features, class_means):
    distances = cdist(features, class_means, metric='euclidean')  # Compute distance to each class mean
    pseudo_labels = np.argmin(distances, axis=1)  # Assign to closest class
    return pseudo_labels

# Compute class statistics
def compute_class_statistics(features, labels):
    class_means = []
    for cls in np.unique(labels):
        cls_features = features[labels == cls]
        mean = np.mean(cls_features, axis=0)
        class_means.append(mean)
    return np.array(class_means)

# Load precomputed features
def load_precomputed_features(dataset_type, dataset_index):
    features_file = f"extracted_feature/part_one_dataset/{dataset_type}/D{dataset_index}_{dataset_type}_features.npy"
    features = np.load(features_file)
    return features

# Training with pseudo-labeling
def task_one_model():
    task1_train_features = []
    task1_eval_features = []

    # Load features for all datasets
    for i in range(1, 11):  # D1 to D10
        # Load training dataset features
        train_features = load_precomputed_features("train", i)
        task1_train_features.append(train_features)

        # Load evaluation dataset features
        eval_features = load_precomputed_features("eval", i)
        task1_eval_features.append(eval_features)

    # Start with D1 (Dataset 1) for Task 1
    X_d1_features = task1_train_features[0]  # Use actual features from D1
    y_d1 = np.load("extracted_feature/part_one_dataset/train/D1_train_labels.npy")  # Load D1 labels
    class_means = compute_class_statistics(X_d1_features, y_d1)

    accuracies = np.zeros((10, 10))  # To store the accuracies for D1 to D10

    for i in range(1, 11):
        if i > 1:
            # Use precomputed features for D2 to D10
            X_di_features = task1_train_features[i - 1]
            pseudo_labels = assign_pseudo_labels(X_di_features, class_means)
            class_means = compute_class_statistics(X_di_features, pseudo_labels)

        # Evaluate on Task 1 datasets (D1 to D10)
        for j in range(1, i + 1):  # Evaluate from D1 to D{i}
            X_eval_features = task1_eval_features[j - 1]
            predictions = assign_pseudo_labels(X_eval_features, class_means)

            # If evaluation labels are known for testing purposes, compute accuracy
            eval_labels_file = f"extracted_feature/part_one_dataset/eval/D{j}_eval_labels.npy"
            if os.path.exists(eval_labels_file):
                y_eval = np.load(eval_labels_file)
                accuracies[i - 1, j - 1] = accuracy_score(y_eval, predictions)

        # Print iteration-level accuracy
        print(f"After updating with Dataset {i}, Accuracy Matrix Row {i}: {accuracies[i - 1, :i]}")

    # Save the final class means of f10 to a file
    torch.save(class_means, 'f10_class_means_task1.npy')
    print("Class means of f10 saved to 'f10_class_means_task1.npy'.")

    return accuracies

# Main execution
if __name__ == "__main__":
    accuracies = task_one_model()

    print("Accuracy Matrix for Task-1:")
    print(accuracies)


After updating with Dataset 1, Accuracy Matrix Row 1: [0.9436]
After updating with Dataset 2, Accuracy Matrix Row 2: [0.9124 0.9108]
After updating with Dataset 3, Accuracy Matrix Row 3: [0.8972 0.8972 0.9004]
After updating with Dataset 4, Accuracy Matrix Row 4: [0.8964 0.8968 0.8996 0.9016]
After updating with Dataset 5, Accuracy Matrix Row 5: [0.8944 0.8912 0.8948 0.8976 0.896 ]
After updating with Dataset 6, Accuracy Matrix Row 6: [0.892  0.8912 0.8956 0.894  0.8948 0.8972]
After updating with Dataset 7, Accuracy Matrix Row 7: [0.8912 0.8932 0.8952 0.898  0.8936 0.898  0.8964]
After updating with Dataset 8, Accuracy Matrix Row 8: [0.8908 0.892  0.8944 0.8972 0.894  0.8984 0.8976 0.8928]
After updating with Dataset 9, Accuracy Matrix Row 9: [0.8912 0.8888 0.892  0.896  0.896  0.8976 0.8936 0.8916 0.8904]
After updating with Dataset 10, Accuracy Matrix Row 10: [0.8892 0.886  0.8896 0.8892 0.8876 0.892  0.8888 0.8872 0.8852 0.8928]
Class means of f10 saved to 'f10_class_means_task1.np