In [1]:
# Final Model


import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import ViTModel
from PIL import Image
import warnings

# Suppress Warnings for Transformer Models
warnings.filterwarnings("ignore", category=UserWarning)

# Custom Dataset Class
class CustomCIFARDataset(Dataset):
    def __init__(self, file_path, transform=None, labeled=True):
        try:
            data_dict = torch.load(file_path)
        except TypeError:
            print("Warning: Falling back to non-weighted mode. Ensure the file is trusted.")
            data_dict = torch.load(file_path)

        self.images = data_dict['data']
        self.labels = data_dict['targets'] if labeled else None
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        image = Image.fromarray(image)  # Convert to PIL Image
        if self.transform:
            image = self.transform(image)

        if self.labels is not None:
            return image, self.labels[idx]
        else:
            return image

# Feature Extractor Using Vision Transformer
class ViTFeatureExtractorModel(nn.Module):
    def __init__(self, pretrained=True):
        super(ViTFeatureExtractorModel, self).__init__()
        self.vit = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
        self.feature_dim = self.vit.config.hidden_size  # Output size of ViT

        # Freeze ViT weights
        if pretrained:
            for param in self.vit.parameters():
                param.requires_grad = False

    def forward(self, x):
        outputs = self.vit(pixel_values=x, return_dict=True)
        return outputs.last_hidden_state[:, 0, :]  # Use the [CLS] token embedding

# Bayesian Classifier
class BayesianClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(BayesianClassifier, self).__init__()
        self.num_classes = num_classes
        self.input_dim = input_dim
        self.prior_mean = nn.Parameter(torch.zeros(num_classes, input_dim), requires_grad=False)
        self.prior_cov = nn.Parameter(torch.eye(input_dim).repeat(num_classes, 1, 1), requires_grad=False)

    def update_posteriors(self, features, labels):
        for c in range(self.num_classes):
            class_features = features[labels == c]
            if len(class_features) > 0:
                n_c = len(class_features)
                sample_mean = class_features.mean(dim=0)
                prior_cov_inv = torch.linalg.inv(self.prior_cov[c])
                likelihood_cov_inv = torch.eye(self.input_dim, device=features.device) * n_c

                posterior_cov = torch.linalg.inv(prior_cov_inv + likelihood_cov_inv)
                posterior_mean = posterior_cov @ (
                    prior_cov_inv @ self.prior_mean[c] + likelihood_cov_inv @ sample_mean
                )

                self.prior_mean.data[c] = posterior_mean
                self.prior_cov.data[c] = posterior_cov

    def forward(self, features):
        normalized_features = features / (torch.norm(features, dim=1, keepdim=True) + 1e-6)
        normalized_prototypes = self.prior_mean / (torch.norm(self.prior_mean, dim=1, keepdim=True) + 1e-6)
        cosine_similarity = torch.mm(normalized_features, normalized_prototypes.T)
        return cosine_similarity

# Save Model Parameters
def save_model(class_means, class_covs, file_path):
    torch.save({
        "class_means": class_means,
        "class_covs": class_covs
    }, file_path)

# Load Model Parameters
def load_model(file_path):
    checkpoint = torch.load(file_path)
    return checkpoint["class_means"], checkpoint["class_covs"]

# Classifier for Non-Gaussian Updates
class MeanUpdateClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MeanUpdateClassifier, self).__init__()
        self.num_classes = num_classes
        self.input_dim = input_dim
        self.class_means = nn.Parameter(torch.zeros(num_classes, input_dim), requires_grad=False)

    def update_means(self, features, labels):
        for c in range(self.num_classes):
            class_features = features[labels == c]
            if len(class_features) > 0:
                self.class_means.data[c] = class_features.mean(dim=0)

    def forward(self, features):
        normalized_features = features / (torch.norm(features, dim=1, keepdim=True) + 1e-6)
        normalized_class_means = self.class_means / (torch.norm(self.class_means, dim=1, keepdim=True) + 1e-6)
        cosine_similarity = torch.mm(normalized_features, normalized_class_means.T)
        return cosine_similarity

# Main Training and Evaluation Function
def train_and_evaluate():
    train_data_path = '/content/drive/MyDrive/cs771/dataset/dataset/part_one_dataset/train_data'
    eval_data_path = '/content/drive/MyDrive/cs771/dataset/dataset/part_one_dataset/eval_data'

    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize for ViT input
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize for ViT
    ])

    batch_size = 64
    confidence_threshold = 0.8
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    feature_extractor = ViTFeatureExtractorModel(pretrained=True).to(device)
    gaussian_classifier = BayesianClassifier(input_dim=feature_extractor.feature_dim, num_classes=10).to(device)

    # Phase 1: Train with Gaussian Method
    for i in range(1, 11):
        print(f"\n=== Processing Dataset D{i} (Gaussian Method) ===")

        train_dataset = CustomCIFARDataset(
            os.path.join(train_data_path, f'{i}_train_data.tar.pth'),
            transform=transform,
            labeled=(i == 1)
        )
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        features_list, labels_list = [], []
        for batch in train_loader:
            if i == 1:  # D1 is labeled
                images, labels = batch
                labels_list.append(labels.to(device))
            else:  # D2 onward
                images = batch

            images = images.to(device)
            features = feature_extractor(images)
            features_list.append(features)

        features = torch.cat(features_list).to(device)
        if labels_list:
            labels = torch.cat(labels_list).to(device)
        else:
            labels = None

        if i == 1:
            gaussian_classifier.update_posteriors(features, labels)
        else:
            with torch.no_grad():
                logits = gaussian_classifier(features)
                predictions = logits.argmax(dim=1)
                confidences = logits.max(dim=1).values
            high_conf_indices = confidences > confidence_threshold
            gaussian_classifier.update_posteriors(
                features[high_conf_indices], predictions[high_conf_indices]
            )

    # Save the trained Gaussian model
    save_model(gaussian_classifier.prior_mean, gaussian_classifier.prior_cov, "gaussian_model.pth")

train_and_evaluate()


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]


=== Processing Dataset D1 (Gaussian Method) ===


  data_dict = torch.load(file_path)



=== Processing Dataset D2 (Gaussian Method) ===

=== Processing Dataset D3 (Gaussian Method) ===

=== Processing Dataset D4 (Gaussian Method) ===

=== Processing Dataset D5 (Gaussian Method) ===

=== Processing Dataset D6 (Gaussian Method) ===

=== Processing Dataset D7 (Gaussian Method) ===

=== Processing Dataset D8 (Gaussian Method) ===

=== Processing Dataset D9 (Gaussian Method) ===

=== Processing Dataset D10 (Gaussian Method) ===


In [8]:
def update_and_evaluate():
    # Paths for train and eval datasets
    path_one_train = '/content/drive/MyDrive/cs771/dataset/dataset/part_one_dataset/train_data'
    path_one_eval = '/content/drive/MyDrive/cs771/dataset/dataset/part_one_dataset/eval_data'
    path_two_train = '/content/drive/MyDrive/cs771/dataset/dataset/part_two_dataset/train_data'
    path_two_eval = '/content/drive/MyDrive/cs771/dataset/dataset/part_two_dataset/eval_data'

    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize for ViT input
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize for ViT
    ])

    batch_size = 64
    confidence_threshold = 0.8
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    feature_extractor = ViTFeatureExtractorModel(pretrained=True).to(device)
    gaussian_classifier = BayesianClassifier(input_dim=feature_extractor.feature_dim, num_classes=10).to(device)
    mean_update_classifier = MeanUpdateClassifier(input_dim=feature_extractor.feature_dim, num_classes=10).to(device)

    # Phase 1: Train with Mean Update Method (D1-D10)
    for i in range(1, 11):
        print(f"\n=== Processing Dataset D{i} (Mean Update Method) ===")

        train_dataset = CustomCIFARDataset(
            os.path.join(path_one_train, f'{i}_train_data.tar.pth'),
            transform=transform,
            labeled=True
        )
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        features_list, labels_list = [], []
        for batch in train_loader:
            images, labels = batch
            images = images.to(device)
            features = feature_extractor(images)
            features_list.append(features)
            labels_list.append(labels.to(device))

        features = torch.cat(features_list)
        labels = torch.cat(labels_list)

        # Update class means using direct mean method
        mean_update_classifier.update_means(features, labels)

        # Save the updated model after training D1-D10
        if i == 10:
            print("\n--- Saving Mean Update Classifier Model ---")
            torch.save(mean_update_classifier.state_dict(), 'mean_update_classifier.pth')

    # Phase 2: Update and Evaluate with Gaussian Method (D11-D20)
    # Load the saved Gaussian model
    class_means, class_covs = load_model("gaussian_model.pth")
    gaussian_classifier.prior_mean.data = class_means

    for i in range(11, 21):
        print(f"\n=== Updating on Path Two: D{i} ===")
        train_dataset = CustomCIFARDataset(
            os.path.join(path_two_train, f'{i-10}_train_data.tar.pth'),
            transform=transform,
            labeled=False
        )
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

        features_list = []
        for batch in train_loader:
            images = batch.to(device)
            features = feature_extractor(images)
            features_list.append(features)

        features = torch.cat(features_list)

        # Update class means and covariances using Gaussian method
        with torch.no_grad():
            logits = gaussian_classifier(features)
            predictions = logits.argmax(dim=1)
            confidences = logits.max(dim=1).values
        high_conf_indices = confidences > confidence_threshold
        gaussian_classifier.update_posteriors(
            features[high_conf_indices], predictions[high_conf_indices]
        )

        # Save updated Gaussian model
        if i == 20:
            print("\n--- Saving Gaussian Classifier Model ---")
            save_model(gaussian_classifier.prior_mean.data, gaussian_classifier.prior_cov.data, 'gaussian_model.pth')

  checkpoint = torch.load(file_path)
  data_dict = torch.load(file_path)



=== Updating on Path Two: D11 ===

=== Evaluating on Path One: D1 ===
Accuracy on D1 (Path One): 0.9504

=== Evaluating on Path One: D2 ===
Accuracy on D2 (Path One): 0.9524

=== Evaluating on Path One: D3 ===
Accuracy on D3 (Path One): 0.9504

=== Evaluating on Path One: D4 ===
Accuracy on D4 (Path One): 0.9500

=== Evaluating on Path One: D5 ===
Accuracy on D5 (Path One): 0.9524

=== Evaluating on Path One: D6 ===
Accuracy on D6 (Path One): 0.9532

=== Evaluating on Path One: D7 ===
Accuracy on D7 (Path One): 0.9496

=== Evaluating on Path One: D8 ===
Accuracy on D8 (Path One): 0.9496

=== Evaluating on Path One: D9 ===
Accuracy on D9 (Path One): 0.9544

=== Evaluating on Path One: D10 ===
Accuracy on D10 (Path One): 0.9604

=== Evaluating on Path Two: D11 ===
Accuracy on D11 (Path Two): 0.8372

=== Evaluating on Path Two: D12 ===
Accuracy on D12 (Path Two): 0.7604

=== Evaluating on Path Two: D13 ===
Accuracy on D13 (Path Two): 0.8856

=== Evaluating on Path Two: D14 ===
Accuracy o

Uisng gaussian approach for all daatsets(D1 to D20)

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import ViTModel, ViTFeatureExtractor
from PIL import Image
import os
import numpy as np

# Dataset Class
class CustomCIFARDataset(Dataset):
    def __init__(self, file_path, transform=None, labeled=True):
        try:
            data_dict = torch.load(file_path)
        except TypeError:
            print("Warning: Falling back to non-weighted mode. Ensure the file is trusted.")
            data_dict = torch.load(file_path)

        self.images = data_dict['data']
        self.labels = data_dict['targets'] if labeled else None
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        image = Image.fromarray(image)  # Convert to PIL Image
        if self.transform:
            image = self.transform(image)

        if self.labels is not None:
            return image, self.labels[idx]
        else:
            return image

# Feature Extractor using Vision Transformer
class ViTFeatureExtractorModel(nn.Module):
    def __init__(self, pretrained=True):
        super(ViTFeatureExtractorModel, self).__init__()
        self.vit = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
        self.feature_dim = self.vit.config.hidden_size  # Output size of ViT

        # Freeze ViT weights
        if pretrained:
            for param in self.vit.parameters():
                param.requires_grad = False

    def forward(self, x):
        outputs = self.vit(pixel_values=x, return_dict=True)
        return outputs.last_hidden_state[:, 0, :]  # Use the [CLS] token embedding

# Bayesian Classifier
class BayesianClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(BayesianClassifier, self).__init__()
        self.num_classes = num_classes
        self.input_dim = input_dim
        self.prior_mean = nn.Parameter(torch.zeros(num_classes, input_dim), requires_grad=False)
        self.prior_cov = nn.Parameter(torch.eye(input_dim).repeat(num_classes, 1, 1), requires_grad=False)

    def update_posteriors(self, features, labels):
        for c in range(self.num_classes):
            class_features = features[labels == c]
            if len(class_features) > 0:
                n_c = len(class_features)
                sample_mean = class_features.mean(dim=0)
                prior_cov_inv = torch.linalg.inv(self.prior_cov[c])
                likelihood_cov_inv = torch.eye(self.input_dim, device=features.device) * n_c

                posterior_cov = torch.linalg.inv(prior_cov_inv + likelihood_cov_inv)
                posterior_mean = posterior_cov @ (
                    prior_cov_inv @ self.prior_mean[c] + likelihood_cov_inv @ sample_mean
                )

                self.prior_mean.data[c] = posterior_mean
                self.prior_cov.data[c] = posterior_cov

    def forward(self, features):
        normalized_features = features / (torch.norm(features, dim=1, keepdim=True) + 1e-6)
        normalized_prototypes = self.prior_mean / (torch.norm(self.prior_mean, dim=1, keepdim=True) + 1e-6)
        cosine_similarity = torch.mm(normalized_features, normalized_prototypes.T)
        return cosine_similarity

# Main Functionality
def train_and_evaluate():
    train_data_path1 = '/content/drive/MyDrive/cs771/dataset/dataset/part_one_dataset/train_data'
    eval_data_path1 = '/content/drive/MyDrive/cs771/dataset/dataset/part_one_dataset/eval_data'

    train_data_path2 = '/content/drive/MyDrive/cs771/dataset/dataset/part_two_dataset/train_data'
    eval_data_path2 = '/content/drive/MyDrive/cs771/dataset/dataset/part_two_dataset/eval_data'

    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize for ViT input
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])  # Normalize for ViT
    ])

    batch_size = 64
    confidence_threshold = 0.8
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize Feature Extractor
    feature_extractor = ViTFeatureExtractorModel(pretrained=True).to(device)

    accuracy_matrix = np.zeros((20, 20))  # Adjusted for datasets from two directories

    # Function to load datasets from a given path
    def load_datasets(base_path, is_labeled):
        for i in range(1, 11):
            train_dataset = CustomCIFARDataset(
                os.path.join(base_path, f'{i}_train_data.tar.pth'),
                transform=transform,
                labeled=is_labeled and i == 1  # Only the first dataset from the first directory is labeled
            )
            yield train_dataset

    for idx, (train_data_path, eval_data_path, is_labeled) in enumerate(
        [(train_data_path1, eval_data_path1, True), (train_data_path2, eval_data_path2, False)]
    ):
        dataset_offset = idx * 10  # Offset for indexing datasets from the second directory
        print(f"\n=== Processing Dataset Group {idx + 1} ===")

        for i, train_dataset in enumerate(load_datasets(train_data_path, is_labeled), start=1):
            print(f"\n=== Processing Dataset D{i + dataset_offset} ===")
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

            features_list, labels_list = [], []
            for batch in train_loader:
                if is_labeled and i == 1:  # D1 from the first directory is labeled
                    images, labels = batch
                    labels_list.append(labels.to(device))
                else:  # D2 onward or all datasets from the second directory
                    images = batch

                images = images.to(device)
                features = feature_extractor(images)  # Extract features
                features_list.append(features)

            features = torch.cat(features_list).to(device)  # Combine all features
            if labels_list:
                labels = torch.cat(labels_list).to(device)
                num_classes = len(torch.unique(labels))
            else:
                labels = None
                num_classes = 10  # Default for unlabeled datasets

            # Initialize Classifier for D1 of each group
            if idx == 0 and i == 1:  # D1 from the first directory
                classifier = BayesianClassifier(input_dim=feature_extractor.feature_dim, num_classes=num_classes).to(device)

            if labels is not None:  # Update posterior using true labels
                classifier.update_posteriors(features, labels)
            else:  # Pseudo-labeling and posterior update
                with torch.no_grad():
                    logits = classifier(features)  # Predict pseudo-labels
                    predictions = logits.argmax(dim=1)
                    confidences = logits.max(dim=1).values  # Confidence scores

                # Filter samples based on confidence threshold
                high_confidence_indices = confidences > confidence_threshold
                high_confidence_features = features[high_confidence_indices]
                high_confidence_labels = predictions[high_confidence_indices]

                # Update posterior using high-confidence pseudo-labeled samples
                if len(high_confidence_features) > 0:
                    classifier.update_posteriors(high_confidence_features, high_confidence_labels)

            print(f"\nEvaluating model f{i + dataset_offset} on held-out datasets D̂1 to D̂{i + dataset_offset}")

            # Evaluation: for i > 10, evaluate on datasets 1-10 from part_one_dataset and 1-i from part_two_dataset
            eval_datasets_1_to_10 = range(1, 11)  # Always evaluate on datasets 1-10 from part_one_dataset
            eval_datasets_1_to_i = range(1, i + 1)  # For second dataset, evaluate on 1-i

            # Evaluate on first 10 datasets from the first directory
            for j in eval_datasets_1_to_10:
                eval_dataset = CustomCIFARDataset(
                    os.path.join(eval_data_path1, f'{j}_eval_data.tar.pth'),
                    transform=transform,
                    labeled=True
                )
                eval_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False)

                correct, total = 0, 0
                with torch.no_grad():
                    for images, labels in eval_loader:
                        images, labels = images.to(device), labels.to(device)
                        features = feature_extractor(images)
                        logits = classifier(features)
                        predictions = logits.argmax(dim=1)
                        correct += (predictions == labels).sum().item()
                        total += labels.size(0)

                accuracy = 100 * correct / total
                accuracy_matrix[i + dataset_offset - 1, j - 1] = accuracy
                print(f"Accuracy of model f{i + dataset_offset} on D̂{j} from first directory: {accuracy:.2f}%")

            # Evaluate on first i datasets from the second directory (no offset)
            for j in eval_datasets_1_to_i:
                eval_dataset = CustomCIFARDataset(
                    os.path.join(eval_data_path2, f'{j}_eval_data.tar.pth'),
                    transform=transform,
                    labeled=True
                )
                eval_loader = DataLoader(eval_dataset, batch_size=batch_size, shuffle=False)

                correct, total = 0, 0
                with torch.no_grad():
                    for images, labels in eval_loader:
                        images, labels = images.to(device), labels.to(device)
                        features = feature_extractor(images)
                        logits = classifier(features)
                        predictions = logits.argmax(dim=1)
                        correct += (predictions == labels).sum().item()
                        total += labels.size(0)

                accuracy = 100 * correct / total
                accuracy_matrix[i + dataset_offset - 1, j - 1] = accuracy
                print(f"Accuracy of model f{i + dataset_offset} on D̂{j} from second directory: {accuracy:.2f}%")

    print("\nAccuracy Matrix:")
    for row in accuracy_matrix:
        print(" ".join(f"{val:6.2f}" for val in row))

# Run
if __name__ == "__main__":
    train_and_evaluate()
