In [39]:
import numpy as np
import copy
import joblib
import torch
from tensorflow.keras.applications import ResNet152
from keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from scipy.spatial.distance import cdist


In [40]:
train_data = []
train_labels = []
eval_data = []
eval_labels = []

predicted_labels = {}

for i in range(10):
  t = torch.load('./dataset/part_two_dataset/train_data/' + str(i+1) + '_train_data.tar.pth')
  train_data.append(t['data'])

for i in range(10):
  t = torch.load('./dataset/part_two_dataset/eval_data/' + str(i+1) + '_eval_data.tar.pth')
  eval_data.append(t['data'])
  eval_labels.append(t['targets'])

  t = torch.load('./dataset/part_two_dataset/train_data/' + str(i+1) + '_train_data.tar.pth')
  t = torch.load('./dataset/part_two_dataset/eval_data/' + str(i+1) + '_eval_data.tar.pth')


In [6]:
train_data_extracted = []
eval_data_extracted = []
model = ResNet152(weights='imagenet', include_top=False, pooling='avg')

def process_and_extract_features(data, model):
    extracted_features = []
    for X in data:
        X_resized = np.array([image.img_to_array(image.array_to_img(img, scale=False).resize((224, 224))) for img in X])
        X_preprocessed = preprocess_input(X_resized)
        features = model.predict(X_preprocessed, batch_size=32)
        extracted_features.append(features)
    return extracted_features


train_data_extracted = process_and_extract_features(train_data, model)
eval_data_extracted = process_and_extract_features(eval_data, model)


2024-11-26 16:27:55.435708: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-11-26 16:27:55.435768: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-11-26 16:27:55.435781: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-11-26 16:27:55.435824: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-11-26 16:27:55.435848: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2024-11-26 16:28:04.045961: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 448ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 357ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 360ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 355ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 356ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 360ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 354ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 358ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 359ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 358ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 365ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 363ms/step
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 359ms/step
[1m79/79[0

In [7]:
print(f"Train data shape: {train_data[0].shape}")
print(f"Extracted features shape: {train_data_extracted[0].shape}")

Train data shape: (2500, 32, 32, 3)
Extracted features shape: (2500, 2048)


In [8]:
# Save the arrays
np.save('./features/Part-2/train_data_extracted.npy', train_data_extracted)
np.save('./features/Part-2/eval_data_extracted.npy', eval_data_extracted)


## Run from here if features are already extracted

In [41]:
train_data_extracted = np.load('./features/Part-2/train_data_extracted.npy', allow_pickle=True)
eval_data_extracted = np.load('./features/Part-2/eval_data_extracted.npy', allow_pickle=True)

print(f"Length of Datastructure: {len(train_data_extracted)}")
print(f"Extracted features shape: {train_data_extracted[0].shape}")

Length of Datastructure: 10
Extracted features shape: (2500, 2048)


In [42]:
X_train = train_data_extracted
y_train = train_labels
X_test = eval_data_extracted

In [43]:
class LwPClassifier:
    def __init__(self, metric='euclidean'):
        self.metric = metric
        self.class_prototypes = {}

    def fit(self, X, y, weights=None, alpha=0.01):
        """Compute prototypes (mean vectors) for each class, with optional weighting and regularization."""
        if len(X.shape) > 2:
            X = X.reshape(X.shape[0], -1)
        
        self.classes_ = np.unique(y)
        if weights is None:
            weights = np.ones_like(y, dtype=float)

        self.class_prototypes = {
            cls: np.average(X[y == cls], axis=0, weights=weights[y == cls]) - alpha * np.mean(X, axis=0) 
            for cls in self.classes_
        }

    def predict(self, X):
        """Predict class for each sample in X."""
        if len(X.shape) > 2:
            X = X.reshape(X.shape[0], -1)
        prototypes = np.array([self.class_prototypes[cls] for cls in self.classes_])
        distances = cdist(X, prototypes, metric=self.metric)
        closest_prototype_idx = distances.argmin(axis=1)
        return self.classes_[closest_prototype_idx]

    def retrain(self, X, y, momentum=0.9):
        """Update prototypes with new data using adaptive learning and momentum."""
        if len(X.shape) > 2:
            X = X.reshape(X.shape[0], -1)
        
        for cls in np.unique(y):
            if cls in self.class_prototypes:
                old_prototype = self.class_prototypes[cls]
                new_prototype = X[y == cls].mean(axis=0)
                self.class_prototypes[cls] = momentum * old_prototype + (1 - momentum) * new_prototype
            else:
                self.class_prototypes[cls] = X[y == cls].mean(axis=0)
    
    def retrain_with_unlabeled_data(self, X, threshold=0.8):
        """Adapt model to unlabeled data by generating pseudo-labels based on current prototypes."""
        if len(X.shape) > 2:
            X = X.reshape(X.shape[0], -1)
        
        distances = cdist(X, np.array(list(self.class_prototypes.values())), metric=self.metric)
        predicted_labels = np.argmin(distances, axis=1)
        predicted_confidences = np.min(distances, axis=1)
        
        confident_indices = predicted_confidences < threshold
        pseudo_labels = predicted_labels[confident_indices]
        X_confident = X[confident_indices]
        
        self.retrain(X_confident, pseudo_labels)

    def score(self, X, y):
        """Calculate accuracy of the model."""
        predictions = self.predict(X)
        return np.mean(predictions == y)


In [44]:
f10 = joblib.load('./models/Part-1/f10.joblib')
f10.retrain_with_unlabeled_data(X_train[0])

predicted_labels['D11'] = f10.predict(X_train[0])
print(predicted_labels['D11'])

[3 8 8 ... 8 3 2]


In [45]:
f11 = copy.deepcopy(f10)
f11.retrain_with_unlabeled_data(X_train[0])
f11.retrain(X_train[0], predicted_labels['D11'])

predicted_labels['D12'] = f11.predict(X_train[1])
print(predicted_labels['D12'])

[8 8 8 ... 8 3 3]


In [46]:
f12 = copy.deepcopy(f11)
f12.retrain_with_unlabeled_data(X_train[1])
f12.retrain(X_train[1], predicted_labels['D12'])

predicted_labels['D13'] = f12.predict(X_train[2])
print(predicted_labels['D13'])

[3 8 8 ... 8 3 6]


In [47]:
f13 = copy.deepcopy(f12)
f13.retrain_with_unlabeled_data(X_train[2])
f13.retrain(X_train[2], predicted_labels['D13'])

predicted_labels['D14'] = f13.predict(X_train[3])
print(predicted_labels['D14'])

[3 8 8 ... 8 3 2]


In [48]:
f14 = copy.deepcopy(f13)
f14.retrain_with_unlabeled_data(X_train[3])
f14.retrain(X_train[3], predicted_labels['D14'])

predicted_labels['D15'] = f14.predict(X_train[4])
print(predicted_labels['D15'])

[3 8 8 ... 8 3 2]


In [49]:
f15 = copy.deepcopy(f14)
f15.retrain_with_unlabeled_data(X_train[4])
f15.retrain(X_train[4], predicted_labels['D15'])

predicted_labels['D16'] = f15.predict(X_train[5])
print(predicted_labels['D16'])

[3 8 8 ... 8 3 6]


In [50]:
f16 = copy.deepcopy(f15)
f16.retrain_with_unlabeled_data(X_train[5])
f16.retrain(X_train[5], predicted_labels['D16'])

predicted_labels['D17'] = f16.predict(X_train[6])
print(predicted_labels['D17'])

[3 8 8 ... 8 3 2]


In [51]:
f17 = copy.deepcopy(f16)
f17.retrain_with_unlabeled_data(X_train[6])
f17.retrain(X_train[6], predicted_labels['D17'])

predicted_labels['D18'] = f17.predict(X_train[7])
print(predicted_labels['D18'])

[6 8 8 ... 8 3 0]


In [52]:
f18 = copy.deepcopy(f17)
f18.retrain_with_unlabeled_data(X_train[7])
f18.retrain(X_train[7], predicted_labels['D18'])

predicted_labels['D19'] = f18.predict(X_train[8])
print(predicted_labels['D19'])

[3 8 8 ... 8 3 9]


In [53]:
f19 = copy.deepcopy(f18)
f19.retrain_with_unlabeled_data(X_train[8])
f19.retrain(X_train[8], predicted_labels['D19'])

predicted_labels['D20'] = f19.predict(X_train[9])
print(predicted_labels['D20'])

[3 1 8 ... 8 3 2]


In [54]:
f20 = copy.deepcopy(f19)
f20.retrain_with_unlabeled_data(X_train[9])
f20.retrain(X_train[9], predicted_labels['D20'])

# Testing

In [55]:
models = [f11, f12, f13, f14, f15, f16, f17, f18, f19, f20]
outputs_partial = [[] for _ in range(10)]

In [56]:
accuracies_partial = [[] for _ in range(10)]

for i in range(1, 10+1):
    for j in range(1, i+1):
        model_id = i-1
        data_id = j-1
        score = models[model_id].score(eval_data_extracted[data_id], eval_labels[data_id])
        accuracies_partial[model_id].append(score)

In [57]:
for i in range(10):
    print(accuracies_partial[i])

[0.7028]
[0.7004, 0.5244]
[0.6972, 0.5256, 0.7376]
[0.6932, 0.5204, 0.736, 0.806]
[0.692, 0.5188, 0.7324, 0.8044, 0.832]
[0.688, 0.5192, 0.7336, 0.7992, 0.8316, 0.7144]
[0.6908, 0.5152, 0.7352, 0.798, 0.8284, 0.7128, 0.776]
[0.6848, 0.5148, 0.7324, 0.7968, 0.8232, 0.7096, 0.772, 0.7072]
[0.6804, 0.5032, 0.7276, 0.7924, 0.82, 0.7032, 0.7648, 0.704, 0.5532]
[0.6792, 0.5012, 0.7272, 0.7936, 0.8192, 0.7012, 0.7632, 0.7008, 0.5508, 0.8024]


## Saving models

In [60]:
for i, model in enumerate(models, start=1):
    filename = f'./models/Part-2/f{i+10}.joblib'
    joblib.dump(model, filename)