# Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install -U -q PyDrive
!pip install httplib2==0.15.0
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from pydrive.files import GoogleDriveFileList
from google.colab import auth
from oauth2client.client import GoogleCredentials

from getpass import getpass
import urllib

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Cloning PAL_2021 to access modules.
# Need password to access private repo.

if 'CLIPPER' not in os.listdir():
    cmd_string = 'git clone https://github.com/PAL-ML/CLIPPER.git'
    os.system(cmd_string)



# Installation

## Install multi label metrics dependencies

In [None]:
! pip install scikit-learn==0.24



## Install CLIP dependencies

In [None]:
import subprocess

CUDA_version = [s for s in subprocess.check_output(["nvcc", "--version"]).decode("UTF-8").split(", ") if s.startswith("release")][0].split(" ")[-1]
print("CUDA version:", CUDA_version)

if CUDA_version == "10.0":
    torch_version_suffix = "+cu100"
elif CUDA_version == "10.1":
    torch_version_suffix = "+cu101"
elif CUDA_version == "10.2":
    torch_version_suffix = ""
else:
    torch_version_suffix = "+cu110"

CUDA version: 10.1


In [None]:
! pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex

Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [None]:
! pip install ftfy regex
! wget https://openaipublic.azureedge.net/clip/bpe_simple_vocab_16e6.txt.gz -O bpe_simple_vocab_16e6.txt.gz

--2021-02-28 10:18:22--  https://openaipublic.azureedge.net/clip/bpe_simple_vocab_16e6.txt.gz
Resolving openaipublic.azureedge.net (openaipublic.azureedge.net)... 13.107.246.19, 13.107.213.19, 2620:1ec:bdf::19, ...
Connecting to openaipublic.azureedge.net (openaipublic.azureedge.net)|13.107.246.19|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1356917 (1.3M) [application/octet-stream]
Saving to: ‘bpe_simple_vocab_16e6.txt.gz’


2021-02-28 10:18:22 (26.9 MB/s) - ‘bpe_simple_vocab_16e6.txt.gz’ saved [1356917/1356917]



In [None]:
!pip install git+https://github.com/Sri-vatsa/CLIP # using this fork because of visualization capabilities

Collecting git+https://github.com/Sri-vatsa/CLIP
  Cloning https://github.com/Sri-vatsa/CLIP to /tmp/pip-req-build-nusku9om
  Running command git clone -q https://github.com/Sri-vatsa/CLIP /tmp/pip-req-build-nusku9om
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-cp37-none-any.whl size=1368623 sha256=79c6bcfe0f287577a15e22601283596d86f334dca4e014d3c84fe6c76b99e035
  Stored in directory: /tmp/pip-ephem-wheel-cache-khiutq_s/wheels/cc/55/69/0d411dabbd5009fd069d47b47cf7839c54e595dc61725b307b
Successfully built clip


## Install clustering dependencies

In [None]:
!pip -q install umap-learn>=0.3.7

## Install dataset manager dependencies

In [None]:
!pip install wget



# Imports

In [None]:
# ML Libraries
import tensorflow as tf
import tensorflow_hub as hub
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from tensorflow import keras


# Data processing
import PIL
import base64
import imageio
import pandas as pd
import numpy as np
import json

from PIL import Image
import cv2
from sklearn.feature_extraction.image import extract_patches_2d

# Plotting
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from IPython.core.display import display, HTML
from matplotlib import cm
import matplotlib.image as mpimg

# Models
import clip

# Datasets
import tensorflow_datasets as tfds

# Clustering
# import umap

from sklearn import metrics
from sklearn.cluster import KMeans
#from yellowbrick.cluster import KElbowVisualizer

# Misc
import progressbar
import logging
from abc import ABC, abstractmethod
import time
import urllib.request
import os
from sklearn.metrics import jaccard_score, hamming_loss, accuracy_score, f1_score
from sklearn.preprocessing import MultiLabelBinarizer


# Modules
from CLIPPER.code.ExperimentModules import embedding_models
from CLIPPER.code.ExperimentModules.dataset_manager import DatasetManager
from CLIPPER.code.ExperimentModules.weight_imprinting_classifier import WeightImprintingClassifier
from CLIPPER.code.ExperimentModules import simclr_data_augmentations
from CLIPPER.code.ExperimentModules.utils import (save_npy, load_npy, 
                                                       get_folder_id, 
                                                       create_expt_dir, 
                                                       save_to_drive, 
                                                       load_all_from_drive_folder, 
                                                       download_file_by_name, 
                                                       delete_file_by_name)

logging.getLogger('googleapicliet.discovery_cache').setLevel(logging.ERROR)



# Initialization & Constants

## Dataset details

In [None]:
IMG_HEIGHT = 112
IMG_WIDTH = 112

experiment_id = "UCF101-Embeddings"

folder_name = experiment_id+"-28-02-21"

# Change parentid to match that of experiments root folder in gdrive
parentid = '1bK72W-Um20EQDEyChNhNJthUNbmoSEjD'

In [None]:
# Initialize sepcific experiment folder in drive
folderid = create_expt_dir(drive, parentid, folder_name)

title: UCF101-Embeddings-28-02-21, id: 1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi


# Embedding function

In [None]:
def run_data_through_model(
    data, 
    embedder, 
    filename, 
    drive,
    folderid,
    total_num_images,
    max_num_samples=5000,
):
    embedder.load_model()

    embeddings = None
    num_images_done = 0

    while embeddings is None or num_images_done < total_num_images:
        download_file_by_name(drive, folderid, filename)

        if filename in os.listdir():
            embeddings = np.load(filename)['data']
            num_images_done = embeddings.shape[0]
            if num_images_done == total_num_images:
                print("All images done already.")
                break
            else:
                print("{}/{} images done already".format(
                    num_images_done, total_num_images)
                )

        print("Running for image indices {}-{}.".format(
            num_images_done, num_images_done+max_num_samples
            )
        )
        if (num_images_done+max_num_samples) <= total_num_images:
            batch = data[num_images_done:num_images_done+max_num_samples]
        else:
            batch = data[num_images_done:]

        processed_batch = embedder.preprocess_data(batch)
        embeddings_batch = embedder.embed_images(
            processed_batch, batch_size=50
            )
        
        if embeddings is None:
            embeddings = embeddings_batch
        else:
            embeddings = np.concatenate(
                [embeddings, embeddings_batch]
                )
            
        delete_file_by_name(drive, folderid, filename)
        embedder.save_embeddings_to_drive(
            embeddings, 
            filename,
            drive,
            folderid
            )
        num_images_done = embeddings.shape[0]
        print("{}/{} images done".format(num_images_done, total_num_images))


# Train data split

## Load Data

In [None]:
dm = DatasetManager()
train_data_generator = dm.load_dataset('ucf_101', split="train")

Found 13320 files belonging to 101 classes.
Using 10656 files for training.


In [None]:
_data = []
_labels = []
for i, (image, label) in enumerate(train_data_generator):
    resized_image = cv2.resize(image[0], (IMG_WIDTH, IMG_HEIGHT)).astype(np.uint8)
    _data.append(resized_image)
    _labels.append(label[0])

train_data = np.stack(_data)
train_labels = np.array(_labels)

del _data
del _labels

print('Images shape: ', train_data.shape)
print('Labels length: ', len(train_labels))


Images shape:  (10656, 112, 112, 3)
Labels length:  10656


In [None]:
# Save train labels
train_labels_filename = 'train_labels.npz'

if train_labels_filename not in os.listdir():
    save_npy(train_labels_filename, train_labels)
    save_to_drive(drive, folderid, train_labels_filename)

In [None]:
total_train_images = len(train_data)
total_train_images

10656

## Inception V3

In [None]:
max_num_samples = 4000 # Colab crashes with too many images
inceptionv3_train_filename = 'inceptionv3_embeddings_train.npz'

inceptionv3_train_embedder = embedding_models.InceptionV3EmbeddingWrapper()

run_data_through_model(
    train_data, 
    inceptionv3_train_embedder, 
    inceptionv3_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )

Downloading inceptionv3_embeddings_train.npz from GDrive
All images done already.


## Resnet 50

In [None]:
max_num_samples = 2000
resnet50_train_filename = 'resnet50_embeddings_train.npz'

resnet50_train_embedder = embedding_models.Resnet50EmbeddingWrapper()

run_data_through_model(
    train_data, 
    resnet50_train_embedder, 
    resnet50_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )

Downloading resnet50_embeddings_train.npz from GDrive
All images done already.


## MoCo Resnet 50

In [None]:
max_num_samples = 2000
moco_resnet50_train_filename = 'moco_resnet50_embeddings_train.npz'

moco_resnet50_train_embedder = embedding_models.MoCoResnet50EmbeddingWrapper()

run_data_through_model(
    train_data, 
    moco_resnet50_train_embedder, 
    moco_resnet50_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )

Downloading moco_resnet50_embeddings_train.npz from GDrive
All images done already.


## PCL Resnet 50

In [None]:
max_num_samples = 2000
pcl_resnet50_train_filename = 'pcl_resnet50_embeddings_train.npz'

pcl_resnet50_train_embedder = embedding_models.PCLResnet50EmbeddingWrapper()

run_data_through_model(
    train_data, 
    pcl_resnet50_train_embedder, 
    pcl_resnet50_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )

Downloading pcl_resnet50_embeddings_train.npz from GDrive
All images done already.


## SwAV Resnet 50

In [None]:
max_num_samples = 1000
swav_resnet50_train_filename = 'swav_resnet50_embeddings_train.npz'

swav_resnet50_train_embedder = embedding_models.SwAVResnet50EmbeddingWrapper()

run_data_through_model(
    train_data, 
    swav_resnet50_train_embedder, 
    swav_resnet50_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )

Using cache found in /root/.cache/torch/hub/facebookresearch_swav_master


Downloading swav_resnet50_embeddings_train.npz from GDrive
All images done already.


## SimCLR

In [None]:
max_num_samples = 1000
simclr_train_filename = 'simclr_embeddings_train.npz'

simclr_train_embedder = embedding_models.SimCLREmbeddingWrapper()

run_data_through_model(
    train_data, 
    simclr_train_embedder, 
    simclr_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )



Downloading simclr_embeddings_train.npz from GDrive
All images done already.


## VGG16

In [None]:
max_num_samples = 1000
vgg16_train_filename = 'vgg16_embeddings_train.npz'

vgg16_train_embedder = embedding_models.VGG16EmbeddingWrapper()

run_data_through_model(
    train_data, 
    vgg16_train_embedder, 
    vgg16_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )

Downloading vgg16_embeddings_train.npz from GDrive
All images done already.


## CLIP

In [None]:
max_num_samples = 1000
clip_train_filename = 'clip_embeddings_train.npz'

clip_train_embedder = embedding_models.CLIPEmbeddingWrapper()

run_data_through_model(
    train_data, 
    clip_train_embedder, 
    clip_train_filename,
    drive,
    folderid,
    total_train_images,
    max_num_samples
    )

Running for image indices 0-1000.


                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:00:05] |********************************  | (ETA:   0:00:00) 

Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
1000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
1000/10656 images done already
Running for image indices 1000-2000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
2000/10656 images done already
Running for image indices 2000-3000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
3000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
3000/10656 images done already
Running for image indices 3000-4000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
4000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
4000/10656 images done already
Running for image indices 4000-5000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
5000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
5000/10656 images done already
Running for image indices 5000-6000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
6000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
6000/10656 images done already
Running for image indices 6000-7000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
7000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
7000/10656 images done already
Running for image indices 7000-8000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
8000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
8000/10656 images done already
Running for image indices 8000-9000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
9000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
9000/10656 images done already
Running for image indices 9000-10000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
10000/10656 images done
Downloading clip_embeddings_train.npz from GDrive
10000/10656 images done already
Running for image indices 10000-11000.


 [elapsed time: 0:00:00] |****                              | (ETA:   0:00:00) 

Embedding images...


 [elapsed time: 0:00:00] |*******************************   | (ETA:   0:00:00) 

Deleting clip_embeddings_train.npz from GDrive
Data saved to clip_embeddings_train.npz
Uploaded clip_embeddings_train.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
10656/10656 images done


# Val data split

## Load Data

In [None]:
val_data_generator = dm.load_dataset('ucf_101', split="val")

Found 13320 files belonging to 101 classes.
Using 2664 files for validation.


In [None]:
_data = []
_labels = []
for i, (image, label) in enumerate(val_data_generator):
    resized_image = cv2.resize(image[0], (IMG_WIDTH, IMG_HEIGHT)).astype(np.uint8)
    _data.append(resized_image)
    _labels.append(label[0])

val_data = np.stack(_data)
val_labels = np.array(_labels)

del _data
del _labels

print('Images shape: ', val_data.shape)
print('Alphabet labels length: ', len(val_labels))


Images shape:  (2664, 112, 112, 3)
Alphabet labels length:  2664


In [None]:
# Save val labels
val_labels_filename = 'val_labels.npz'

if val_labels_filename not in os.listdir():
    save_npy(val_labels_filename, val_labels)
    save_to_drive(drive, folderid, val_labels_filename)

Data saved to val_labels.npz
Uploaded val_labels.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi


In [None]:
total_val_images = len(val_data)
total_val_images

2664

## Inception V3

In [None]:
max_num_samples = 2000 # Colab crashes with too many images
inceptionv3_val_filename = 'inceptionv3_embeddings_val.npz'

inceptionv3_val_embedder = embedding_models.InceptionV3EmbeddingWrapper()

run_data_through_model(
    val_data, 
    inceptionv3_val_embedder, 
    inceptionv3_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )

Downloading inceptionv3_embeddings_val.npz from GDrive
All images done already.


## Resnet 50

In [None]:
max_num_samples = 2000
resnet50_val_filename = 'resnet50_embeddings_val.npz'

resnet50_val_embedder = embedding_models.Resnet50EmbeddingWrapper()

run_data_through_model(
    val_data, 
    resnet50_val_embedder, 
    resnet50_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )

                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Running for image indices 0-2000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:06:18] |**********************************| (ETA:  00:00:00) 

Data saved to resnet50_embeddings_val.npz
Uploaded resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/2664 images done
Downloading resnet50_embeddings_val.npz from GDrive


 [elapsed time: 0:00:00] |****                              | (ETA:   0:00:01) 

2000/2664 images done already
Running for image indices 2000-4000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:02:05] |**********************************| (ETA:  00:00:00) 

Deleting resnet50_embeddings_val.npz from GDrive
Data saved to resnet50_embeddings_val.npz
Uploaded resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2664/2664 images done


## MoCo Resnet 50

In [None]:
max_num_samples = 2000
moco_resnet50_val_filename = 'moco_resnet50_embeddings_val.npz'

moco_resnet50_val_embedder = embedding_models.MoCoResnet50EmbeddingWrapper()

run_data_through_model(
    val_data, 
    moco_resnet50_val_embedder, 
    moco_resnet50_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )

 [elapsed time: 0:00:00] |                                  | (ETA:   0:00:04) 

Running for image indices 0-2000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:06:46] |**********************************| (ETA:  00:00:00) 

Data saved to moco_resnet50_embeddings_val.npz
Uploaded moco_resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/2664 images done
Downloading moco_resnet50_embeddings_val.npz from GDrive


 [elapsed time: 0:00:00] |****                              | (ETA:   0:00:01) 

2000/2664 images done already
Running for image indices 2000-4000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:02:13] |**********************************| (ETA:  00:00:00) 

Deleting moco_resnet50_embeddings_val.npz from GDrive
Data saved to moco_resnet50_embeddings_val.npz
Uploaded moco_resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2664/2664 images done


## PCL Resnet 50

In [None]:
max_num_samples = 2000
pcl_resnet50_val_filename = 'pcl_resnet50_embeddings_val.npz'

pcl_resnet50_val_embedder = embedding_models.PCLResnet50EmbeddingWrapper()

run_data_through_model(
    val_data, 
    pcl_resnet50_val_embedder, 
    pcl_resnet50_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )

 [elapsed time: 0:00:00] |                                  | (ETA:   0:00:03) 

Running for image indices 0-2000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:06:11] |**********************************| (ETA:  00:00:00) 

Data saved to pcl_resnet50_embeddings_val.npz
Uploaded pcl_resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/2664 images done
Downloading pcl_resnet50_embeddings_val.npz from GDrive


 [elapsed time: 0:00:00] |**                                | (ETA:   0:00:01) 

2000/2664 images done already
Running for image indices 2000-4000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:02:02] |**********************************| (ETA:  00:00:00) 

Deleting pcl_resnet50_embeddings_val.npz from GDrive
Data saved to pcl_resnet50_embeddings_val.npz
Uploaded pcl_resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2664/2664 images done


## SwAV Resnet 50

In [None]:
max_num_samples = 1000
swav_resnet50_val_filename = 'swav_resnet50_embeddings_val.npz'

swav_resnet50_val_embedder = embedding_models.SwAVResnet50EmbeddingWrapper()

run_data_through_model(
    val_data, 
    swav_resnet50_val_embedder, 
    swav_resnet50_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )

Using cache found in /root/.cache/torch/hub/facebookresearch_swav_master
 [elapsed time: 0:00:00] |*                                 | (ETA:   0:00:01) 

Running for image indices 0-1000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:03:16] |**********************************| (ETA:  00:00:00) 

Data saved to swav_resnet50_embeddings_val.npz
Uploaded swav_resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
1000/2664 images done
Downloading swav_resnet50_embeddings_val.npz from GDrive


 [elapsed time: 0:00:00] |*                                 | (ETA:   0:00:01) 

1000/2664 images done already
Running for image indices 1000-2000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:03:15] |**********************************| (ETA:  00:00:00) 

Deleting swav_resnet50_embeddings_val.npz from GDrive
Data saved to swav_resnet50_embeddings_val.npz
Uploaded swav_resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/2664 images done
Downloading swav_resnet50_embeddings_val.npz from GDrive


 [elapsed time: 0:00:00] |****                              | (ETA:   0:00:01) 

2000/2664 images done already
Running for image indices 2000-3000.
Preprocessing data...


 [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:02:06] |**********************************| (ETA:  00:00:00) 

Deleting swav_resnet50_embeddings_val.npz from GDrive
Data saved to swav_resnet50_embeddings_val.npz
Uploaded swav_resnet50_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2664/2664 images done


## SimCLR

In [None]:
max_num_samples = 1000
simclr_val_filename = 'simclr_embeddings_val.npz'

simclr_val_embedder = embedding_models.SimCLREmbeddingWrapper()

run_data_through_model(
    val_data, 
    simclr_val_embedder, 
    simclr_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )



Running for image indices 0-1000.


                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:00:11] |**********************************| (ETA:  00:00:00) 

Data saved to simclr_embeddings_val.npz
Uploaded simclr_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
1000/2664 images done
Downloading simclr_embeddings_val.npz from GDrive
1000/2664 images done already
Running for image indices 1000-2000.


                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Embedding images...


 [elapsed time: 0:00:03] |**********************************| (ETA:  00:00:00) 

Deleting simclr_embeddings_val.npz from GDrive
Data saved to simclr_embeddings_val.npz
Uploaded simclr_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/2664 images done
Downloading simclr_embeddings_val.npz from GDrive
2000/2664 images done already
Running for image indices 2000-3000.


 [elapsed time: 0:00:00] |**                                | (ETA:   0:00:02) 

Embedding images...


 [elapsed time: 0:00:02] |**********************************| (ETA:  00:00:00) 

Deleting simclr_embeddings_val.npz from GDrive
Data saved to simclr_embeddings_val.npz
Uploaded simclr_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2664/2664 images done


## VGG16

In [None]:
max_num_samples = 1000
vgg16_val_filename = 'vgg16_embeddings_val.npz'

vgg16_val_embedder = embedding_models.VGG16EmbeddingWrapper()

run_data_through_model(
    val_data, 
    vgg16_val_embedder, 
    vgg16_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )

                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

Running for image indices 0-1000.
Embedding images...


 [elapsed time: 0:00:08] |**********************************| (ETA:  00:00:00) 

Data saved to vgg16_embeddings_val.npz
Uploaded vgg16_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
1000/2664 images done
Downloading vgg16_embeddings_val.npz from GDrive


                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

1000/2664 images done already
Running for image indices 1000-2000.
Embedding images...


 [elapsed time: 0:00:06] |**********************************| (ETA:  00:00:00) 

Deleting vgg16_embeddings_val.npz from GDrive
Data saved to vgg16_embeddings_val.npz
Uploaded vgg16_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/2664 images done
Downloading vgg16_embeddings_val.npz from GDrive


                                                                                [elapsed time: 0:00:00] |                                  | (ETA:  --:--:--) 

2000/2664 images done already
Running for image indices 2000-3000.
Embedding images...


 [elapsed time: 0:00:05] |**********************************| (ETA:  00:00:00) 

Deleting vgg16_embeddings_val.npz from GDrive
Data saved to vgg16_embeddings_val.npz
Uploaded vgg16_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2664/2664 images done


## CLIP

In [None]:
max_num_samples = 1000
clip_val_filename = 'clip_embeddings_val.npz'

clip_val_embedder = embedding_models.CLIPEmbeddingWrapper()

run_data_through_model(
    val_data, 
    clip_val_embedder, 
    clip_val_filename,
    drive,
    folderid,
    total_val_images,
    max_num_samples
    )

Running for image indices 0-1000.


 [elapsed time: 0:00:00] |*                                 | (ETA:   0:00:03) 

Embedding images...


 [elapsed time: 0:00:05] |**********************************| (ETA:  00:00:00) 

Data saved to clip_embeddings_val.npz
Uploaded clip_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
1000/2664 images done
Downloading clip_embeddings_val.npz from GDrive
1000/2664 images done already
Running for image indices 1000-2000.


 [elapsed time: 0:00:00] |***                               | (ETA:   0:00:01) 

Embedding images...


 [elapsed time: 0:00:01] |**********************************| (ETA:  00:00:00) 

Deleting clip_embeddings_val.npz from GDrive
Data saved to clip_embeddings_val.npz
Uploaded clip_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2000/2664 images done
Downloading clip_embeddings_val.npz from GDrive
2000/2664 images done already
Running for image indices 2000-3000.


 [elapsed time: 0:00:00] |****                              | (ETA:   0:00:00) 

Embedding images...


 [elapsed time: 0:00:00] |*******************************   | (ETA:   0:00:00) 

Deleting clip_embeddings_val.npz from GDrive
Data saved to clip_embeddings_val.npz
Uploaded clip_embeddings_val.npz to https://drive.google.com/drive/u/1/folders/1AR8qp1QPFv0M3jCIQKK8VA2xsNilo_pi
2664/2664 images done
