In [8]:

import tensorflow as tf
import numpy as np
import torch
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cdist

import pickle

def save_weights(classifier: LwPClassifier, feature_reducer: FeatureReducer, filename: str = "weights.pkl"):
    """Save classifier prototypes and PCA components."""
    weights = {
        "prototypes": classifier.prototypes,
        "pca_components": feature_reducer.pca.components_,
        "pca_mean": feature_reducer.pca.mean_
    }
    with open(filename, "wb") as f:
        pickle.dump(weights, f)
    print(f"Weights saved to {filename}")

def load_weights(classifier: LwPClassifier, feature_reducer: FeatureReducer, filename: str = "weights.pkl"):
    """Load classifier prototypes and PCA components."""
    with open(filename, "rb") as f:
        weights = pickle.load(f)
    classifier.prototypes = weights["prototypes"]
    feature_reducer.pca.components_ = weights["pca_components"]
    feature_reducer.pca.mean_ = weights["pca_mean"]
    print(f"Weights loaded from {filename}")
    
# Step 1: Load the dataset
def load_dataset(filepath: str):
    """Loads a dataset from a given file path."""
    t = torch.load(filepath)
    data, targets = t['data'], t.get('targets')  # Targets may be None for unlabeled data
    return data, targets

# Step 2: Feature Extraction with MobileNet
class MobileNetFeatureExtractor:
    def __init__(self, input_shape=(224, 224, 3)):
        # Load MobileNet without the top classification layer
        self.model = tf.keras.applications.MobileNet(
            weights='imagenet', include_top=False, pooling='avg', input_shape=input_shape
        )

    def preprocess(self, data: np.ndarray):
        """Preprocess raw image data to match MobileNet input requirements."""
        data = data.astype(np.float32)
        resized_data = np.array([tf.image.resize(img, (224, 224)).numpy() for img in data])
        preprocessed_data = tf.keras.applications.mobilenet.preprocess_input(resized_data)
        return preprocessed_data

    def extract(self, data: np.ndarray):
        """Extract features using MobileNet."""
        preprocessed_data = self.preprocess(data)
        features = self.model.predict(preprocessed_data, batch_size=32, verbose=1)
        return features

# Step 3: Dimensionality Reduction
class FeatureReducer:
    def __init__(self, n_components=256):
        self.pca = PCA(n_components=n_components)

    def fit_transform(self, data: np.ndarray):
        return self.pca.fit_transform(data)

    def transform(self, data: np.ndarray):
        return self.pca.transform(data)

# Step 4: LwP Classifier
class LwPClassifier:
    def __init__(self, num_classes: int):
        self.num_classes = num_classes
        self.prototypes = None

    def fit(self, data: np.ndarray, labels: np.ndarray):
        """Initialize prototypes based on labeled data."""
        self.prototypes = []
        for cls in range(self.num_classes):
            cls_data = data[labels == cls]
            if len(cls_data) > 0:
                cls_prototype = cls_data.mean(axis=0)
                self.prototypes.append(cls_prototype)
        self.prototypes = np.array(self.prototypes)

    def predict(self, data: np.ndarray):
        """Predict labels for the given data."""
        distances = cdist(data, self.prototypes)
        return np.argmin(distances, axis=1)

    def update(self, data: np.ndarray, pseudo_labels: np.ndarray, alpha=0.7):
        """Update prototypes using pseudo-labeled data."""
        for cls in range(self.num_classes):
            cls_data = data[pseudo_labels == cls]
            if len(cls_data) > 0:
                cls_mean = cls_data.mean(axis=0)
                self.prototypes[cls] = alpha * self.prototypes[cls] + (1 - alpha) * cls_mean

# Step 5: Training and Evaluation
def train_and_evaluate(train_files: list, eval_files: list, num_classes=10, alpha=0.7, confidence_threshold=0.9):
    """Train models f1, ..., f10 and evaluate on held-out datasets."""
    accuracies = np.zeros((len(train_files), len(eval_files)))
    feature_extractor = MobileNetFeatureExtractor()
    feature_reducer = FeatureReducer()
    classifier = LwPClassifier(num_classes=num_classes)

    # Load and prepare D1
    data, targets = load_dataset(train_files[0])
    features = feature_extractor.extract(data)
    reduced_features = feature_reducer.fit_transform(features)
    classifier.fit(reduced_features, np.array(targets))

    for i in range(1, len(train_files) + 1):
        print(f"Training model f{i}...")

        # Evaluate on held-out datasets
        for j in range(i):  # Evaluate only on \hat{D}_1 to \hat{D}_i
            eval_data, eval_targets = load_dataset(eval_files[j])
            eval_features = feature_extractor.extract(eval_data)
            eval_reduced_features = feature_reducer.transform(eval_features)
            predictions = classifier.predict(eval_reduced_features)
            accuracies[i - 1, j] = accuracy_score(eval_targets, predictions)

        # Stop after f10
        if i == len(train_files):
            break

        # Load next unlabeled dataset (D2, ..., D10)
        next_data, _ = load_dataset(train_files[i])
        next_features = feature_extractor.extract(next_data)
        next_reduced_features = feature_reducer.transform(next_features)

        # Predict labels for next dataset
        pseudo_labels = classifier.predict(next_reduced_features)

        # Confidence filtering
        if confidence_threshold:
            distances = cdist(next_reduced_features, classifier.prototypes)
            confidence = 1 - (distances.min(axis=1) / distances.max(axis=1))
            mask = confidence >= confidence_threshold
            next_reduced_features = next_reduced_features[mask]
            pseudo_labels = pseudo_labels[mask]

        # Update classifier using pseudo-labeled data
        classifier.update(next_reduced_features, pseudo_labels, alpha=alpha)
        # Save weights after training
        save_weights(classifier, feature_reducer, filename="trained_weights.pkl")
    return accuracies

# Example usage
# /content/1_train_data.tar.pth
train_files = [f"dataset/part_one_dataset/train_data/{i}_train_data.tar.pth" for i in range(1, 11)]  # Replace with actual paths
eval_files = [f"dataset/part_one_dataset/eval_data/{i}_eval_data.tar.pth" for i in range(1, 11)]  # Replace with actual paths
accuracies = train_and_evaluate(train_files, eval_files)
print("Accuracy matrix:")
print(accuracies)

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 376ms/step
Training model f1...
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 335ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 341ms/step
Weights saved to trained_weights.pkl
Training model f2...
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 346ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 345ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 352ms/step
Weights saved to trained_weights.pkl
Training model f3...
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 356ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 365ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 358ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 361ms/step
Weights saved to trained_weights.pkl
Training model f4...
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[