# Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install -U -q PyDrive
!pip install httplib2==0.15.0
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from pydrive.files import GoogleDriveFileList
from google.colab import auth
from oauth2client.client import GoogleCredentials

from getpass import getpass
import urllib

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Cloning PAL_2021 to access modules.
# Need password to access private repo.

if 'CLIPPER' not in os.listdir():
    cmd_string = 'git clone https://github.com/PAL-ML/CLIPPER.git'
    os.system(cmd_string)



# Installation

## Install multi label metrics dependencies

In [None]:
! pip install scikit-learn==0.24



## Install CLIP dependencies

In [None]:
# import subprocess

# CUDA_version = [s for s in subprocess.check_output(["nvcc", "--version"]).decode("UTF-8").split(", ") if s.startswith("release")][0].split(" ")[-1]
# print("CUDA version:", CUDA_version)

# if CUDA_version == "10.0":
#     torch_version_suffix = "+cu100"
# elif CUDA_version == "10.1":
#     torch_version_suffix = "+cu101"
# elif CUDA_version == "10.2":
#     torch_version_suffix = ""
# else:
#     torch_version_suffix = "+cu110"

In [None]:
# ! pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex

In [None]:
# ! pip install ftfy regex
# ! wget https://openaipublic.azureedge.net/clip/bpe_simple_vocab_16e6.txt.gz -O bpe_simple_vocab_16e6.txt.gz

In [None]:
# !pip install git+https://github.com/Sri-vatsa/CLIP # using this fork because of visualization capabilities

## Install clustering dependencies

In [None]:
!pip -q install umap-learn>=0.3.7

## Install dataset manager dependencies

In [None]:
!pip install wget

# Imports

In [None]:
# ML Libraries
import tensorflow as tf
import tensorflow_hub as hub
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import keras

# Data processing
import PIL
import base64
import imageio
import pandas as pd
import numpy as np
import json

from PIL import Image
import cv2
from sklearn.feature_extraction.image import extract_patches_2d

# Plotting
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from IPython.core.display import display, HTML
from matplotlib import cm
import matplotlib.image as mpimg

# Models
# import clip

# Datasets
import tensorflow_datasets as tfds

# Clustering
# import umap

from sklearn import metrics
from sklearn.cluster import KMeans
#from yellowbrick.cluster import KElbowVisualizer

# Misc
import progressbar
import logging
from abc import ABC, abstractmethod
import time
import urllib.request
import os
from sklearn.metrics import jaccard_score, hamming_loss, accuracy_score, f1_score
from sklearn.preprocessing import MultiLabelBinarizer


# Modules
# from CLIPPER.code.ExperimentModules import embedding_models
from CLIPPER.code.ExperimentModules.dataset_manager import DatasetManager
from CLIPPER.code.ExperimentModules.weight_imprinting_classifier import WeightImprintingClassifier
from CLIPPER.code.ExperimentModules import simclr_data_augmentations
from CLIPPER.code.ExperimentModules.utils import (save_npy, load_npy, 
                                                       get_folder_id, 
                                                       create_expt_dir, 
                                                       save_to_drive, 
                                                       load_all_from_drive_folder, 
                                                       download_file_by_name, 
                                                       delete_file_by_name)

logging.getLogger('googleapicliet.discovery_cache').setLevel(logging.ERROR)



# Initialization & Constants

## Dataset details

In [None]:
folder_name = "ImagenetSketch-Embeddings-28-02-21"

# Change parentid to match that of experiments root folder in gdrive
parentid = '1bK72W-Um20EQDEyChNhNJthUNbmoSEjD'

# Filepaths
train_labels_filename = "train_labels.npz"
val_labels_filename = "val_labels.npz"

train_embeddings_filename_suffix = "_embeddings_train.npz"
val_embeddings_filename_suffix = "_embeddings_val.npz"

# Initialize sepcific experiment folder in drive
folderid = create_expt_dir(drive, parentid, folder_name)

title: ImagenetSketch-Embeddings-28-02-21, id: 1HVl_ZBLqXrDhMDlk76a2tIQ_deOFqEaF


## Few shot learning parameters

In [None]:
num_ways = 5 # [5, 20]
num_shot = 5 # [5, 1]
num_eval = 15 # [5, 10, 15, 19]
num_episodes = 100
shuffle = False

# Load data

In [None]:
def get_ndarray_from_drive(drive, folderid, filename):
    download_file_by_name(drive, folderid, filename)
    return np.load(filename)['data']

val_labels = get_ndarray_from_drive(drive, folderid, val_labels_filename)

Downloading val_labels.npz from GDrive
Downloading val_labels.npz from GDrive
Downloading val_labels.npz from GDrive
Downloading val_labels.npz from GDrive


# Create label dictionary

In [None]:
unique_labels = np.unique(val_labels)
print(len(unique_labels))

200


In [None]:
label_dictionary = {la:[] for la in unique_labels}

for i in range(len(val_labels)):
    la = val_labels[i]

    label_dictionary[la].append(i)

# Weight Imprinting models on train data embeddings

## Function definitions

In [None]:
def calculate_single_label_accuracy(pred, y, label_mapping):
    x = 0
    for i, p in enumerate(pred):
        pred_label = label_mapping[p]
        if pred_label == y[i]:
            x += 1
    x = x/(i+1)

    return x

In [None]:
def start_progress_bar(bar_len):
    widgets = [
        ' [', 
        progressbar.Timer(format= 'elapsed time: %(elapsed)s'), 
        '] ', 
        progressbar.Bar('*'),' (', 
        progressbar.ETA(), ') ', 
        ]
    pbar = progressbar.ProgressBar(
        max_value=bar_len, widgets=widgets
        ).start()
    return pbar

In [None]:
def run_evaluations(
    embeddings, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways,
    verbose=True,
    metrics=["accuracy", "c_f1"]
):
    accuracies = []
    f1_scores = []

    if verbose:
        pbar = start_progress_bar(num_episodes)

    for i in range(num_episodes):
        wi_x = embeddings[train_indices[i]]
        wi_x = WeightImprintingClassifier.preprocess_input(wi_x)
        eval_x = embeddings[eval_indices[i]]
        eval_x = WeightImprintingClassifier.preprocess_input(eval_x)

        wi_weights, label_mapping = WeightImprintingClassifier.get_imprinting_weights(
            wi_x, wi_y[i], False
            )

        wi_parameters = {
            "num_classes": num_ways,
            "input_dims": wi_x.shape[-1],
            "scale": False,
            "dense_layer_weights": wi_weights
        }

        wi_cls = WeightImprintingClassifier(wi_parameters)

        # Evaluate the weight imprinting model
        metric_vals = wi_cls.evaluate_single_label_metrics(eval_x, eval_y[i], label_mapping, metrics=metrics)
        if "accuracy" in metrics:
            accuracies.append(metric_vals["accuracy"])  
        if "c_f1" in metrics:
            f1_scores.append(metric_vals["c_f1"])

        del wi_x
        del eval_x
        del wi_cls

        if verbose:
            pbar.update(i+1)

    metric_arrays = []
    if "accuracy" in metrics:
        metric_arrays.append(accuracies)
    if "c_f1" in metrics:
        metric_arrays.append(f1_scores)

    return metric_arrays

## Picking indices

In [None]:
eval_indices = []
train_indices = []
wi_y = []
eval_y = []

label_dictionary = {la:label_dictionary[la] for la in label_dictionary if len(label_dictionary[la]) >= (num_shot+num_eval)}
unique_labels = list(label_dictionary.keys())

pbar = start_progress_bar(num_episodes)

for s in range(num_episodes):
    # Setting random seed for replicability
    np.random.seed(s)

    _train_indices = []
    _eval_indices = []

    selected_labels = np.random.choice(unique_labels, size=num_ways, replace=False)
    for la in selected_labels:
        la_indices = label_dictionary[la]
        select = np.random.choice(la_indices, size = num_shot+num_eval, replace=False)
        tr_idx = list(select[:num_shot])
        ev_idx = list(select[num_shot:])

        _train_indices = _train_indices + tr_idx
        _eval_indices = _eval_indices + ev_idx

    if shuffle:
        np.random.shuffle(_train_indices)
        np.random.shuffle(_eval_indices)

    train_indices.append(_train_indices)
    eval_indices.append(_eval_indices)

    _wi_y = val_labels[_train_indices]
    _eval_y = val_labels[_eval_indices]

    wi_y.append(_wi_y)
    eval_y.append(_eval_y)

    pbar.update(s+1)

 [elapsed time: 0:00:00] |*********************             | (ETA:   0:00:00) 

## Inception V3

In [None]:
# Load numpy data from drive
inceptionv3_embeddings_val_fn = "inceptionv3" + val_embeddings_filename_suffix

inceptionv3_embeddings_val = get_ndarray_from_drive(drive, folderid, inceptionv3_embeddings_val_fn)

Downloading inceptionv3_embeddings_val.npz from GDrive


In [None]:
inceptionv3_accuracies, inceptionv3_f1_scores = run_evaluations(
    inceptionv3_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 



 [elapsed time: 0:00:09] |**********************************| (ETA:  00:00:00) 

In [None]:
inceptionv3_mean_accuracy = np.mean(inceptionv3_accuracies)
print("Inceptionv3 Mean accuracy: ", inceptionv3_mean_accuracy)
inceptionv3_mean_f1_score = np.mean(inceptionv3_f1_scores)
print("Inceptionv3 Mean f1 score: ", inceptionv3_mean_f1_score)

Inceptionv3 Mean accuracy:  0.31144999999999995
Inceptionv3 Mean f1 score:  0.3015192363487129


## Resnet 50

In [None]:
resnet50_embeddings_val_fn = "resnet50" + val_embeddings_filename_suffix
resnet50_embeddings_val = get_ndarray_from_drive(drive, folderid, resnet50_embeddings_val_fn)


Downloading resnet50_embeddings_val.npz from GDrive


In [None]:
resnet50_accuracies, resnet50_f1_scores = run_evaluations(
    resnet50_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

 [elapsed time: 0:00:09] |********************************* | (ETA:   0:00:00) 

In [None]:
resnet50_mean_accuracy = np.mean(resnet50_accuracies)
print("Resnet 50 Mean accuracy: ", resnet50_mean_accuracy)
resnet50_mean_f1_score = np.mean(resnet50_f1_scores)
print("Resnet 50 Mean f1 score: ", resnet50_mean_f1_score)

Resnet 50 Mean accuracy:  0.10399999999999998
Resnet 50 Mean f1 score:  0.09740936456885803


## MoCo Resnet

In [None]:
moco_resnet50_embeddings_val_fn = "moco_resnet50" + val_embeddings_filename_suffix

moco_resnet50_embeddings_val = get_ndarray_from_drive(drive, folderid, moco_resnet50_embeddings_val_fn)

Downloading moco_resnet50_embeddings_val.npz from GDrive


In [None]:
moco_resnet50_accuracies, moco_resnet50_f1_scores = run_evaluations(
    moco_resnet50_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

 [elapsed time: 0:00:10] |********************************* | (ETA:   0:00:00) 

In [None]:
moco_resnet50_mean_accuracy = np.mean(moco_resnet50_accuracies)
print("Moco Resnet Mean accuracy: ", moco_resnet50_mean_accuracy)
moco_resnet50_mean_f1_score = np.mean(moco_resnet50_f1_scores)
print("Moco Resnet Mean f1 score: ", moco_resnet50_mean_f1_score)

Moco Resnet Mean accuracy:  0.21430000000000005
Moco Resnet Mean f1 score:  0.20550055299441897


## PCL Resnet

In [None]:
pcl_resnet50_embeddings_val_fn = "pcl_resnet50" + val_embeddings_filename_suffix

pcl_resnet50_embeddings_val = get_ndarray_from_drive(drive, folderid, pcl_resnet50_embeddings_val_fn)

Downloading pcl_resnet50_embeddings_val.npz from GDrive


In [None]:
pcl_resnet50_accuracies, pcl_resnet50_f1_scores = run_evaluations(
    pcl_resnet50_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

 [elapsed time: 0:00:09] |********************************* | (ETA:   0:00:00) 

In [None]:
pcl_resnet50_mean_accuracy = np.mean(pcl_resnet50_accuracies)
print("PCL Resnet Mean accuracy: ", pcl_resnet50_mean_accuracy)
pcl_resnet50_mean_f1_score = np.mean(pcl_resnet50_f1_scores)
print("PCL Resnet Mean f1 score: ", pcl_resnet50_mean_f1_score)

PCL Resnet Mean accuracy:  0.14455
PCL Resnet Mean f1 score:  0.1386753621511451


## SwAV Resnet

In [None]:
swav_resnet50_embeddings_val_fn = "swav_resnet50" + val_embeddings_filename_suffix

swav_resnet50_embeddings_val = get_ndarray_from_drive(drive, folderid, swav_resnet50_embeddings_val_fn)

Downloading swav_resnet50_embeddings_val.npz from GDrive


In [None]:
swav_resnet50_accuracies, swav_resnet50_f1_scores = run_evaluations(
    swav_resnet50_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

 [elapsed time: 0:00:09] |********************************* | (ETA:   0:00:00) 

In [None]:
swav_resnet50_mean_accuracy = np.mean(swav_resnet50_accuracies)
print("Swav Resnet Mean accuracy: ", swav_resnet50_mean_accuracy)
swav_resnet50_mean_f1_score = np.mean(swav_resnet50_f1_scores)
print("Swav Resnet Mean f1 score: ", swav_resnet50_mean_f1_score)

Swav Resnet Mean accuracy:  0.11054999999999998
Swav Resnet Mean f1 score:  0.10011777146822265


## SimCLR

In [None]:
simclr_embeddings_val_fn = "simclr" + val_embeddings_filename_suffix

simclr_embeddings_val = get_ndarray_from_drive(drive, folderid, simclr_embeddings_val_fn)

Downloading simclr_embeddings_val.npz from GDrive


In [None]:
simclr_accuracies, simclr_f1_scores = run_evaluations(
    simclr_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

 [elapsed time: 0:00:09] |********************************* | (ETA:   0:00:00) 

In [None]:
simclr_mean_accuracy = np.mean(simclr_accuracies)
print("Simclr Mean accuracy: ", simclr_mean_accuracy)
simclr_mean_f1_score = np.mean(simclr_f1_scores)
print("Simclr Mean f1 score: ", simclr_mean_f1_score)

Simclr Mean accuracy:  0.42619999999999997
Simclr Mean f1 score:  0.41386337572224485


## VGG16

In [None]:
vgg16_embeddings_val_fn = "vgg16" + val_embeddings_filename_suffix

vgg16_embeddings_val = get_ndarray_from_drive(drive, folderid, vgg16_embeddings_val_fn)

Downloading vgg16_embeddings_val.npz from GDrive


In [None]:
vgg16_accuracies, vgg16_f1_scores = run_evaluations(
    vgg16_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

 [elapsed time: 0:00:10] |********************************* | (ETA:   0:00:00) 

In [None]:
vgg16_mean_accuracy = np.mean(vgg16_accuracies)
print("VGG16 Mean accuracy: ", vgg16_mean_accuracy)
vgg16_mean_f1_score = np.mean(vgg16_f1_scores)
print("VGG16 Mean f1 score: ", vgg16_mean_f1_score)

VGG16 Mean accuracy:  0.39815
VGG16 Mean f1 score:  0.3875069947665369


## CLIP

In [None]:
clip_embeddings_val_fn = "clip" + val_embeddings_filename_suffix

clip_embeddings_val = get_ndarray_from_drive(drive, folderid, clip_embeddings_val_fn)

Downloading clip_embeddings_val.npz from GDrive


In [None]:
clip_accuracies, clip_f1_scores = run_evaluations(
    clip_embeddings_val, 
    train_indices, 
    eval_indices, 
    wi_y, 
    eval_y, 
    num_episodes, 
    num_ways
)

 [elapsed time: 0:00:09] |**********************************| (ETA:  00:00:00) 

In [None]:
clip_mean_accuracy = np.mean(clip_accuracies)
print("CLIP Mean accuracy: ", clip_mean_accuracy)
clip_mean_f1_score = np.mean(clip_f1_scores)
print("CLIP Mean f1 score: ", clip_mean_f1_score)

CLIP Mean accuracy:  0.5427
CLIP Mean f1 score:  0.5257351049338709


# Conclusion

In [None]:
mean_accuracy_dict={
    inceptionv3_mean_accuracy: "Inception V3",
    resnet50_mean_accuracy: "Resnet 50",
    moco_resnet50_mean_accuracy: "MoCo Resnet 50",
    pcl_resnet50_mean_accuracy: "PCL Resnet 50",
    swav_resnet50_mean_accuracy: "SwAV Resnet 50",
    simclr_mean_accuracy: "SimCLR",
    vgg16_mean_accuracy: "VGG 16", 
    clip_mean_accuracy: "CLIP"
}

acc_vals = sorted(list(mean_accuracy_dict.keys()), reverse=True)

In [None]:
mean_f1_score_dict={
    inceptionv3_mean_f1_score: "Inception V3",
    resnet50_mean_f1_score: "Resnet 50",
    moco_resnet50_mean_f1_score: "MoCo Resnet 50",
    pcl_resnet50_mean_f1_score: "PCL Resnet 50",
    swav_resnet50_mean_f1_score: "SwAV Resnet 50",
    simclr_mean_f1_score: "SimCLR",
    vgg16_mean_f1_score: "VGG 16", 
    clip_mean_f1_score: "CLIP"
}

f1_vals = sorted(list(mean_f1_score_dict.keys()), reverse=True)

In [None]:
print("All accuracies for {} way {} shot classification on ImagenetSketch with {} random episodes- ".format(
    num_ways, num_shot, num_episodes
    ))
for val in acc_vals:
    print(mean_accuracy_dict[val], ": ", val)

All accuracies for 20 way 1 shot classification on ImagenetSketch with 100 random episodes- 
CLIP :  0.5427
SimCLR :  0.42619999999999997
VGG 16 :  0.39815
Inception V3 :  0.31144999999999995
MoCo Resnet 50 :  0.21430000000000005
PCL Resnet 50 :  0.14455
SwAV Resnet 50 :  0.11054999999999998
Resnet 50 :  0.10399999999999998


In [None]:
print("All f1 scores for {} way {} shot classification on Omniglot with {} random episodes- ".format(
    num_ways, num_shot, num_episodes
    ))
for val in f1_vals:
    print(mean_f1_score_dict[val], ": ", val)

All f1 scores for 20 way 1 shot classification on Omniglot with 100 random episodes- 
CLIP :  0.5257351049338709
SimCLR :  0.41386337572224485
VGG 16 :  0.3875069947665369
Inception V3 :  0.3015192363487129
MoCo Resnet 50 :  0.20550055299441897
PCL Resnet 50 :  0.1386753621511451
SwAV Resnet 50 :  0.10011777146822265
Resnet 50 :  0.09740936456885803
