# Preparation

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [None]:
import os
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import pickle
from scipy import stats
device = torch.device('cuda:0')

In [None]:
# Get the name of an index label
import json

# Load the class index file
with open('/content/gdrive/MyDrive/CCM1016_Project/imagenet_class_index.json', 'r') as f:
    class_idx = json.load(f)

# Creat a dict mapping idex to label
idx2label = {class_idx[str(k)][0]:class_idx[str(k)][1] for k in range(len(class_idx))}

# Creat a dict mapping label to index
label2idx = {class_idx[str(k)][1]:class_idx[str(k)][0] for k in range(len(class_idx))}

# Create a dict mapping category index ("nxxxxxxxx") to class index (0,1,2,3,...)
# idx2number = {class_idx[str(k)][0]:k for k in range(len(class_idx))}

# Get all category names(labels)
def getAllNames():
    allCateNames = []
    for key in idx2label:
        name = idx2label[key]
        print(name)
        allCateNames.append(name)
    return allCateNames

allCateNames = getAllNames()
# print(len(allCateNames))

tench
goldfish
great_white_shark
tiger_shark
hammerhead
electric_ray
stingray
cock
hen
ostrich
brambling
goldfinch
house_finch
junco
indigo_bunting
robin
bulbul
jay
magpie
chickadee
water_ouzel
kite
bald_eagle
vulture
great_grey_owl
European_fire_salamander
common_newt
eft
spotted_salamander
axolotl
bullfrog
tree_frog
tailed_frog
loggerhead
leatherback_turtle
mud_turtle
terrapin
box_turtle
banded_gecko
common_iguana
American_chameleon
whiptail
agama
frilled_lizard
alligator_lizard
Gila_monster
green_lizard
African_chameleon
Komodo_dragon
African_crocodile
American_alligator
triceratops
thunder_snake
ringneck_snake
hognose_snake
green_snake
king_snake
garter_snake
water_snake
vine_snake
night_snake
boa_constrictor
rock_python
Indian_cobra
green_mamba
sea_snake
horned_viper
diamondback
sidewinder
trilobite
harvestman
scorpion
black_and_gold_garden_spider
barn_spider
garden_spider
black_widow
tarantula
wolf_spider
tick
centipede
black_grouse
ptarmigan
ruffed_grouse
prairie_chicken
peacock

# gensim: word embeddings
## get categories for 3 domains

In [None]:
# Get Word Embeddings
from gensim.models import Word2Vec
import gensim.downloader as api

glove_model = api.load('glove-wiki-gigaword-300')



In [None]:
glove_model.index_to_key[0]

'the'

In [None]:
def getOverlap(allCateNames, glove_model):
    overlap = []
    for name in allCateNames:
        if name in glove_model.index_to_key:
            overlap.append(name)
    return overlap
overlap = getOverlap(allCateNames, glove_model)
print(len(overlap))

539


In [None]:
# for item in set(overlap):
    # print(item)
print(len(set(overlap)))

537


In [None]:
# label categories with domain

# Animals
animals = overlap[:171] # all items before index 171 are within the animal domain

# remove polysemy
poly_animals = ["kite", "eft", "tick", "drake", "crane", "redbone", "clumber", "sorrel"]
for poly in poly_animals:
    animals.remove(poly)

# Vehicles
vehicles = ["airliner", "airship", "ambulance", "bobsled", "cab", "canoe", "catamaran", "convertible", 
            "dogsled", "fireboat", "forklift", "go-kart", "gondola", "jeep", "lifeboat", "limousine",
            "oxcart", "pickup", "plane", "schooner", "snowmobile", "speedboat", "streetcar", "submarine",
            "tank", "tractor", "tricycle", "trimaran", "trolleybus", "unicycle", "warplane"]

# Fruits & Vegetables
fruitVege = ["strawberry", "orange", "lemon", "fig", "pineapple", "banana", "jackfruit", "pomegranate",
             "broccoli", "cauliflower", "zucchini", "cucumber", "artichoke", "cardoon", "mushroom",
             "rapeseed", "corn", "acorn", "buckeye", "agaric", "gyromitra", "stinkhorn", "earthstar","bolete"]

print(len(animals))
print(len(vehicles))
print(len(fruitVege))

163
31
24


In [None]:
# random sample 20 categories for each domain
import random
def randomSample(list, seed, n):
    random.seed(seed)
    result = random.sample(list, n)
    return result
animals20 = randomSample(animals, 3108, 20)
vehicles20 = randomSample(vehicles, 3108, 20)
fruitVege20 = randomSample(fruitVege, 3108, 20)

In [None]:
animals20

['mongoose',
 'malamute',
 'hamster',
 'redshank',
 'whippet',
 'hippopotamus',
 'siamang',
 'skunk',
 'zebra',
 'coucal',
 'hartebeest',
 'koala',
 'echidna',
 'marmoset',
 'scorpion',
 'chow',
 'spoonbill',
 'limpkin',
 'indri',
 'platypus']

In [None]:
vehicles20

['gondola',
 'fireboat',
 'lifeboat',
 'speedboat',
 'convertible',
 'dogsled',
 'trimaran',
 'limousine',
 'plane',
 'pickup',
 'submarine',
 'cab',
 'oxcart',
 'canoe',
 'snowmobile',
 'tank',
 'unicycle',
 'jeep',
 'airship',
 'streetcar']

In [None]:
fruitVege20

['artichoke',
 'cauliflower',
 'mushroom',
 'pomegranate',
 'broccoli',
 'rapeseed',
 'acorn',
 'buckeye',
 'pineapple',
 'agaric',
 'zucchini',
 'lemon',
 'gyromitra',
 'bolete',
 'earthstar',
 'orange',
 'banana',
 'stinkhorn',
 'jackfruit',
 'corn']

In [None]:
### get category indeces for each domain
def getIndex(domainList):
    idxList = []
    for cate in domainList:
        cateIdx = label2idx[cate]
        idxList.append(cateIdx)
    return idxList
animals20_cate = getIndex(animals20)
vehicles20_cate = getIndex(vehicles20)
fruitVege20_cate = getIndex(fruitVege20)

In [None]:
animals20_cate

['n02137549',
 'n02110063',
 'n02342885',
 'n02028035',
 'n02091134',
 'n02398521',
 'n02483708',
 'n02445715',
 'n02391049',
 'n01824575',
 'n02422106',
 'n01882714',
 'n01872401',
 'n02490219',
 'n01770393',
 'n02112137',
 'n02006656',
 'n02013706',
 'n02500267',
 'n01873310']

# Store category idx for each domain

In [None]:
animals20_cate = ['n02137549',
 'n02110063',
 'n02342885',
 'n02028035',
 'n02091134',
 'n02398521',
 'n02483708',
 'n02445715',
 'n02391049',
 'n01824575',
 'n02422106',
 'n01882714',
 'n01872401',
 'n02490219',
 'n01770393',
 'n02112137',
 'n02006656',
 'n02013706',
 'n02500267',
 'n01873310']
vehicles20_cate = ['n03447447',
 'n03344393',
 'n03662601',
 'n04273569',
 'n03100240',
 'n03218198',
 'n04483307',
 'n03670208',
 'n03954731',
 'n03930630',
 'n04347754',
 'n02930766',
 'n03868242',
 'n02951358',
 'n04252077',
 'n04389033',
 'n04509417',
 'n03594945',
 'n02692877',
 'n04335435']
fruitVege20_cate = ['n07718747',
 'n07715103',
 'n07734744',
 'n07768694',
 'n07714990',
 'n11879895',
 'n12267677',
 'n12768682',
 'n07753275',
 'n12998815',
 'n07716358',
 'n07749582',
 'n13037406',
 'n13054560',
 'n13044778',
 'n07747607',
 'n07753592',
 'n13040303',
 'n07754684',
 'n12144580']


In [None]:
# vehicles20_cate

In [None]:
# fruitVege20_cate

# Get image embeddings across all layers

## Load Model

In [None]:
# get image embeddings across all layers 

# define file paths and categories
folder_path_Cindy = "/content/gdrive/MyDrive/CCM1016_Project/imageSamples"

# Load the VGG-16 model
# model = models.vgg16(pretrained=True)
weights_vgg16 = models.VGG16_Weights.DEFAULT
model_vgg16 = models.vgg16(weights=weights_vgg16)
model_vgg16.to(device)

# Set the model to evaluation mode
model_vgg16.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

## Define reLU/maxP layers & image transformation

In [None]:
# Get the reLU layers in classifier
reLULayers = []
for name, module in model_vgg16.named_modules():
    # print(f"name : {name}")
    # if type(module) == nn.ReLU and module in model_vgg16.classifier:
    if type(module) == nn.ReLU:
        reLULayers.append(name)
print(reLULayers)
print(len(reLULayers))

pooledLayers = []
for name, module in model_vgg16.named_modules():
    if type(module) == nn.MaxPool2d:
        pooledLayers.append(name)
print(pooledLayers)
print(len(pooledLayers))

# Define the transform for your input images
transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])])

['features.1', 'features.3', 'features.6', 'features.8', 'features.11', 'features.13', 'features.15', 'features.18', 'features.20', 'features.22', 'features.25', 'features.27', 'features.29', 'classifier.1', 'classifier.4']
15
['features.4', 'features.9', 'features.16', 'features.23', 'features.30']
5


## Define hook

In [None]:
# get Activations in all ReLU layers
relu_outputs = {}
maxp_outputs = {}

batch_size = 30
def get_outputs(name):
    def hook(model, input, output):
        # print(output.shape)
        # print(output.detach().mean(dim=0).shape)
        result = output.detach().mean(dim=0).flatten().cpu().numpy() #flatten the feature maps e.g. [64, 224, 224] -> [64*224*224] 1-d array
        if name in reLULayers:
            relu_outputs[name] = result
        elif name in pooledLayers:
            maxp_outputs[name] = result
    return hook

handles = []

for name, module in model_vgg16.named_modules():
    if name in reLULayers or name in pooledLayers:
        handle = module.register_forward_hook(get_outputs(name))
        handles.append(handle)

In [None]:
for handle in handles:
        handle.remove()

## Check for correctly classified images for each category (no hook needed)

In [None]:
# Get Activation for identify correctly labelled images
def getActivation(categories):
    correctImIdx = {}

    for category in categories:
        sub_folder = os.path.join(folder_path_Cindy, category)
        imgs = os.listdir(sub_folder)

        print(f"Processing {len(imgs)} images in {category}...")

        # Load your batch of images
        batch_size = len(imgs)
        images = []
        for img_name in imgs:
            img_path = os.path.join(sub_folder, img_name)
            img = Image.open(img_path).convert('RGB')
            img = transform(img)
            images.append(img)
        images = torch.stack(images).to(device)

        output = model_vgg16(images)

        # code for getting correctly classified images
        _, predicted = torch.max(output.data, 1)
        correct = []
        for i in range(batch_size):
            category_name = weights_vgg16.meta["categories"][predicted[i]]
            score = output.softmax(dim=1)[i][predicted[i]]
            # print(f'{category_name}: {100 * score:.1f}%')
            if category_name == idx2label[category]:
                correct.append(i) 

        correctImIdx[category] = correct

        # store activations in all_activations
        print(f"DONE WITH CATEGORY {category}: {idx2label[category]}")
        # print(output)
        print("")
    return correctImIdx

In [None]:
correctImIdx_Ani = getActivation(animals20_cate)
correctImIdx_Veh = getActivation(vehicles20_cate)
correctImIdx_fV = getActivation(fruitVege20_cate)

Processing 30 images in n02137549...
DONE WITH CATEGORY n02137549: mongoose

Processing 30 images in n02110063...
DONE WITH CATEGORY n02110063: malamute

Processing 30 images in n02342885...
DONE WITH CATEGORY n02342885: hamster

Processing 30 images in n02028035...
DONE WITH CATEGORY n02028035: redshank

Processing 30 images in n02091134...
DONE WITH CATEGORY n02091134: whippet

Processing 30 images in n02398521...
DONE WITH CATEGORY n02398521: hippopotamus

Processing 30 images in n02483708...
DONE WITH CATEGORY n02483708: siamang

Processing 30 images in n02445715...
DONE WITH CATEGORY n02445715: skunk

Processing 30 images in n02391049...
DONE WITH CATEGORY n02391049: zebra

Processing 30 images in n01824575...
DONE WITH CATEGORY n01824575: coucal

Processing 30 images in n02422106...
DONE WITH CATEGORY n02422106: hartebeest

Processing 30 images in n01882714...
DONE WITH CATEGORY n01882714: koala

Processing 30 images in n01872401...
DONE WITH CATEGORY n01872401: echidna

Processi

In [None]:
print(correctImIdx_Ani['n02137549'])

[0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28]


## Get Activations for Animal

In [None]:
# Get Activation using only correctly labelled images
all_activations_relu_Ani = {}
all_activations_maxp_Ani = {}

for category in animals20_cate:
    sub_folder = os.path.join(folder_path_Cindy, category)
    imgs = os.listdir(sub_folder)

    print(f"Processing {len(imgs)} images in {category}...")

    # Load your batch of images
    batch_size = len(imgs)
    images = []
    for img_name in imgs:
        img_path = os.path.join(sub_folder, img_name)
        img = Image.open(img_path).convert('RGB')
        img = transform(img)
        images.append(img)
    images = torch.stack(images)
    correctIdx = correctImIdx_Ani[category]
    images = images[correctIdx].to(device)

    # Compute activations for each ReLU layer in the 'features' module
    relu_outputs = {}
    maxp_outputs = {}
    output = model_vgg16(images)

    # _, predicted = torch.max(output.data, 1)
    # for i in range(len(correctIdx)):
    #     category_name = weights_vgg16.meta["categories"][predicted[i]]
    #     score = output.softmax(dim=1)[i][predicted[i]]
    #     print(f'{category_name}: {100 * score:.1f}%')

    # store activations in all_activations
    all_activations_relu_Ani[category] = relu_outputs
    all_activations_maxp_Ani[category] = maxp_outputs
    print(f"DONE WITH CATEGORY {category}: {idx2label[category]}")
    # print(output)
    print("")
        # break
    # for handle in handles:
    #     handle.remove()

Processing 30 images in n02137549...
mongoose: 99.1%
mongoose: 98.6%
mongoose: 93.3%
mongoose: 75.4%
mongoose: 36.8%
mongoose: 81.1%
mongoose: 54.7%
mongoose: 94.6%
mongoose: 99.7%
mongoose: 99.9%
mongoose: 92.4%
mongoose: 100.0%
mongoose: 98.3%
mongoose: 100.0%
mongoose: 92.6%
mongoose: 91.3%
mongoose: 96.0%
mongoose: 99.9%
mongoose: 99.4%
mongoose: 100.0%
mongoose: 64.9%
mongoose: 95.6%
mongoose: 81.6%
mongoose: 99.9%
DONE WITH CATEGORY n02137549: mongoose

Processing 30 images in n02110063...
malamute: 89.5%
malamute: 47.1%
malamute: 59.3%
malamute: 96.8%
malamute: 72.4%
malamute: 94.7%
malamute: 53.9%
malamute: 76.7%
malamute: 86.5%
malamute: 51.2%
malamute: 86.3%
malamute: 98.5%
malamute: 91.6%
malamute: 82.6%
malamute: 46.2%
malamute: 72.7%
malamute: 90.2%
malamute: 79.4%
malamute: 66.0%
malamute: 50.5%
malamute: 86.2%
malamute: 73.3%
malamute: 73.4%
malamute: 34.6%
malamute: 77.8%
malamute: 58.4%
malamute: 77.9%
DONE WITH CATEGORY n02110063: malamute

Processing 30 images in n02

In [None]:
all_activations_maxp_Ani['n02137549']['features.4']

array([0.9016119 , 0.88740176, 0.7698135 , ..., 2.2063596 , 1.6461315 ,
       2.830195  ], dtype=float32)

## Get Activations for Vehicle

In [None]:
# Get Activation using only correctly labelled images
all_activations_relu_Veh = {}
all_activations_maxp_Veh = {}

for category in vehicles20_cate:
    sub_folder = os.path.join(folder_path_Cindy, category)
    imgs = os.listdir(sub_folder)

    print(f"Processing {len(imgs)} images in {category}...")

    # Load your batch of images
    batch_size = len(imgs)
    images = []
    for img_name in imgs:
        img_path = os.path.join(sub_folder, img_name)
        img = Image.open(img_path).convert('RGB')
        img = transform(img)
        images.append(img)
    images = torch.stack(images)
    correctIdx = correctImIdx_Veh[category]
    images = images[correctIdx].to(device)

    # Compute activations for each ReLU layer in the 'features' module
    relu_outputs = {}
    maxp_outputs = {}
    output = model_vgg16(images)

    # _, predicted = torch.max(output.data, 1)
    # for i in range(len(correctIdx)):
    #     category_name = weights_vgg16.meta["categories"][predicted[i]]
    #     score = output.softmax(dim=1)[i][predicted[i]]
    #     print(f'{category_name}: {100 * score:.1f}%')

    # store activations in all_activations
    all_activations_relu_Veh[category] = relu_outputs
    all_activations_maxp_Veh[category] = maxp_outputs
    print(f"DONE WITH CATEGORY {category}: {idx2label[category]}")
    # print(output)
    print("")

Processing 30 images in n03447447...
DONE WITH CATEGORY n03447447: gondola

Processing 30 images in n03344393...
DONE WITH CATEGORY n03344393: fireboat

Processing 30 images in n03662601...
DONE WITH CATEGORY n03662601: lifeboat

Processing 30 images in n04273569...
DONE WITH CATEGORY n04273569: speedboat

Processing 30 images in n03100240...
DONE WITH CATEGORY n03100240: convertible

Processing 30 images in n03218198...
DONE WITH CATEGORY n03218198: dogsled

Processing 30 images in n04483307...
DONE WITH CATEGORY n04483307: trimaran

Processing 30 images in n03670208...
DONE WITH CATEGORY n03670208: limousine

Processing 30 images in n03954731...
DONE WITH CATEGORY n03954731: plane

Processing 30 images in n03930630...
DONE WITH CATEGORY n03930630: pickup

Processing 30 images in n04347754...
DONE WITH CATEGORY n04347754: submarine

Processing 30 images in n02930766...
DONE WITH CATEGORY n02930766: cab

Processing 30 images in n03868242...
DONE WITH CATEGORY n03868242: oxcart

Process

## Get Activations for fruit & vegetables

In [None]:
# Get Activation using only correctly labelled images
all_activations_relu_fV = {}
all_activations_maxp_fV = {}

for category in fruitVege20_cate:
    sub_folder = os.path.join(folder_path_Cindy, category)
    imgs = os.listdir(sub_folder)

    print(f"Processing {len(imgs)} images in {category}...")

    # Load your batch of images
    batch_size = len(imgs)
    images = []
    for img_name in imgs:
        img_path = os.path.join(sub_folder, img_name)
        img = Image.open(img_path).convert('RGB')
        img = transform(img)
        images.append(img)
    images = torch.stack(images)
    correctIdx = correctImIdx_fV[category]
    images = images[correctIdx].to(device)

    # Compute activations for each ReLU layer in the 'features' module
    relu_outputs = {}
    maxp_outputs = {}
    output = model_vgg16(images)

    # store activations in all_activations
    all_activations_relu_fV[category] = relu_outputs
    all_activations_maxp_fV[category] = maxp_outputs
    print(f"DONE WITH CATEGORY {category}: {idx2label[category]}")
    # print(output)
    print("")

Processing 30 images in n07718747...
DONE WITH CATEGORY n07718747: artichoke

Processing 30 images in n07715103...
DONE WITH CATEGORY n07715103: cauliflower

Processing 30 images in n07734744...
DONE WITH CATEGORY n07734744: mushroom

Processing 30 images in n07768694...
DONE WITH CATEGORY n07768694: pomegranate

Processing 30 images in n07714990...
DONE WITH CATEGORY n07714990: broccoli

Processing 30 images in n11879895...
DONE WITH CATEGORY n11879895: rapeseed

Processing 30 images in n12267677...
DONE WITH CATEGORY n12267677: acorn

Processing 30 images in n12768682...
DONE WITH CATEGORY n12768682: buckeye

Processing 30 images in n07753275...
DONE WITH CATEGORY n07753275: pineapple

Processing 30 images in n12998815...
DONE WITH CATEGORY n12998815: agaric

Processing 30 images in n07716358...
DONE WITH CATEGORY n07716358: zucchini

Processing 30 images in n07749582...
DONE WITH CATEGORY n07749582: lemon

Processing 30 images in n13037406...
DONE WITH CATEGORY n13037406: gyromitra


In [None]:
all_activations_maxp_fV

{'n07718747': {'features.4': array([1.7323625, 1.0153024, 1.2925501, ..., 1.2983474, 1.2710421,
         3.3819418], dtype=float32),
  'features.9': array([ 2.0893612,  2.0727189,  1.2941399, ..., 15.515104 , 16.174261 ,
         16.102457 ], dtype=float32),
  'features.16': array([2.4222813, 1.7433017, 3.2646813, ..., 1.3502731, 4.317965 ,
         5.72149  ], dtype=float32),
  'features.23': array([0.79174745, 1.398755  , 1.2609072 , ..., 1.663609  , 0.598706  ,
         0.23935817], dtype=float32),
  'features.30': array([0.26474065, 0.22063491, 0.10483082, ..., 3.505839  , 2.3923557 ,
         1.3788087 ], dtype=float32)},
 'n07715103': {'features.4': array([1.0481695, 0.6787197, 0.6494938, ..., 1.7293062, 1.4873662,
         3.4572613], dtype=float32),
  'features.9': array([ 1.1092106,  1.5974904,  2.0876713, ..., 16.826818 , 16.359434 ,
         16.13209  ], dtype=float32),
  'features.16': array([2.2763407, 3.14437  , 2.6352062, ..., 1.4575343, 2.049074 ,
         5.066456 ], d

# Store Activations

In [None]:
# import pickle 
# path = "/content/gdrive/MyDrive/CCM1016_Project/"
# with open(path + 'Animal_relu.pkl', 'wb') as f:
#     pickle.dump(all_activations_relu_Ani, f)
# with open(path + 'Animal_maxp.pkl', 'wb') as f:
#     pickle.dump(all_activations_maxp_Ani, f)
# with open(path + 'Vehicle_relu.pkl', 'wb') as f:
#     pickle.dump(all_activations_relu_Veh, f)
# with open(path + 'Vehicle_maxp.pkl', 'wb') as f:
#     pickle.dump(all_activations_maxp_Veh, f)
# with open(path + 'FV_relu.pkl', 'wb') as f:
#     pickle.dump(all_activations_relu_fV, f)
# with open(path + 'FV_maxp.pkl', 'wb') as f:
#     pickle.dump(all_activations_maxp_fV, f)

In [None]:
import pickle 
path = "/content/gdrive/MyDrive/CCM1016_Project/"
outputs = {"Animal_relu": all_activations_relu_Ani, "Animal_maxp":all_activations_maxp_Ani,"Vehicle_relu": all_activations_relu_Veh, "Vehicle_maxp":all_activations_maxp_Veh, 
         "FV_relu":all_activations_relu_fV,"FV_maxp": all_activations_maxp_fV}
with open(path + 'output.pkl', 'wb') as f:
    pickle.dump(outputs, f)

# Obtain normalized image embeddings based on `domain` and `layer`

## import stored activations

In [None]:
import pickle 
path = "/content/gdrive/MyDrive/CCM1016_Project/"
with open(path + 'output.pkl','rb') as f:
    outputs = pickle.load(f)
Animal_relu = outputs["Animal_relu"]
Animal_maxp = outputs["Animal_maxp"]
Vehicle_relu = outputs["Vehicle_relu"]
Vehicle_maxp = outputs["Vehicle_maxp"]
FV_relu = outputs["FV_relu"]
FV_maxp = outputs["FV_maxp"]

In [None]:
# Animal_maxp['n02137549']['features.4']

array([0.9016119 , 0.88740176, 0.7698135 , ..., 2.2063596 , 1.6461315 ,
       2.830195  ], dtype=float32)

## Normalize the feature sets 

In [None]:
'''
sample 
b = np.array([[ 0.3148,  0.0478,  0.6243,  0.4608],
              [ 0.7149,  0.0775,  0.6072,  0.9656],
              [ 0.6341,  0.1403,  0.9759,  0.4064],
              [ 0.5918,  0.6948,  0.904 ,  0.3721],
              [ 0.0921,  0.2481,  0.1188,  0.1366]])
stats.zscore(b, axis=1, ddof=1)
'''

In [None]:
# get normalized image embeddings based on the domain and layer specified
# input
# domain: Animal_relu, layer: classifier.4
# output: dictionary: (key = category name (e.g. rabbit), value = normalized image embedding)
def getNormalizedEmbed(domain, layer):
    output = {}
    for category in domain:
        cateName = idx2label[category]
        rawEmbed = domain[category][layer]
        zScoredEmbed = stats.zscore(rawEmbed)
        output[cateName] = zScoredEmbed
    return output

## ReLU & maxPooling layer names:
## ReLU
### ['features.1', 'features.3', 'features.6', 'features.8', 'features.11', 'features.13', 'features.15', 'features.18', 'features.20', 'features.22', 'features.25', 'features.27', 'features.29', 'classifier.1', 'classifier.4']

## maxPooling
### ['features.4', 'features.9', 'features.16', 'features.23', 'features.30']

## layer:classifier.4 for all domains

In [None]:
ani_fc2 = getNormalizedEmbed(Animal_relu, 'classifier.4')
veh_fc2 = getNormalizedEmbed(Vehicle_relu, 'classifier.4')
fv_fc2 = getNormalizedEmbed(FV_relu, 'classifier.4')

In [None]:
for name in ani_fc2:
    print(ani_fc2[name].shape)

In [None]:
############ useless code ##########

In [None]:
#### run permutation tests ###
# random alignment vs. correct alignment
# reference code:
# https://osf.io/95gyc

In [None]:
import math
import itertools
import copy
from scipy.stats import spearmanr
def symmetric_matrix_indexing(m, perm_idx):
    """Index matrix symmetrically.

    Can be used to symmetrically swap both rows and columns or to
    subsample.
    """
    m_perm = copy.copy(m)
    m_perm = m_perm[perm_idx, :]
    m_perm = m_perm[:, perm_idx]
    return m_perm

def alignment_score_multi(sim_mat_list, alignment_combos):
    """"""
    score = 0
    weight = 1 / len(alignment_combos)
    for combo in alignment_combos:
        score = score + weight * alignment_score(
            sim_mat_list[combo[0]],
            sim_mat_list[combo[1]]
        )
    return score
def alignment_score(a, b, method='spearman'):
    """Return the alignment score between two similarity matrices.

    Assumes that matrix a is the smaller matrix and crops matrix b to
    be the same shape.
    """
    n_row = a.shape[0]
    b_cropped = b[0:n_row, :]
    b_cropped = b_cropped[:, 0:n_row]
    idx_upper = np.triu_indices(n_row, 1)

    if method == 'spearman':
        # Alignment score is the Spearman correlation coefficient.
        alignment_score, _ = spearmanr(a[idx_upper], b_cropped[idx_upper])
    else:
        raise ValueError(
            "The requested method '{0}'' is not implemented.".format(method)
        )
    return alignment_score

def permutation_analysis_random(sim_mat_list, max_perm=10000, n_known=0):
    """Perform random permutation analysis.

    In order to evaluate our ability to correctly align matrices, we
    first compute the correlation of the correct ordering and then
    compute the correlations for the sampled permutations. What we
    would like to see is that the correlation for the correct ordering
    is higher than any of the permuted alignments.
    """
    n_sim_mat = len(sim_mat_list)
    n_item = sim_mat_list[1].shape[0]
    n_perm = np.minimum(max_perm, math.factorial(n_item - n_known))
    alignment_combos = list(itertools.combinations(np.arange(n_sim_mat, dtype=int), 2))

    # Sample from possible permutations. 
    perm_list_all = []
    fixed_idx = np.arange(n_item - n_known, n_item)
    for _ in range(n_sim_mat - 1):
        perm_list = np.zeros([n_perm + 1, n_item], dtype=int)
        perm_list[0, :] = np.arange(n_item)
        for i_perm in range(n_perm):
            perm_list[i_perm + 1, 0:(n_item - n_known)] = np.random.permutation(n_item - n_known)
            perm_list[i_perm + 1, (n_item - n_known):] = fixed_idx
        perm_list_all.append(perm_list)

    # We store the correlation with correct ordering at the first idx, and the
    # correlations for the swapped ordering in the remaining indices.
    rho_array = np.zeros([n_perm + 1])
    # Correct alignment.
    rho_array[0] = alignment_score_multi(sim_mat_list, alignment_combos)
    # Permuted alignments.
    for perm_idx in range(n_perm):
        sim_mat_perm_list = []
        # Add unpermuted matrix.
        sim_mat_perm_list.append(sim_mat_list[0])
        # Add permuted matrices.
        for sim_mat_idx in range(n_sim_mat - 1):
            sim_mat_perm_list.append(
                symmetric_matrix_indexing(sim_mat_list[sim_mat_idx + 1], perm_list_all[sim_mat_idx][perm_idx + 1,:])
            )
        # Compute score
        rho_array[perm_idx + 1] = alignment_score_multi(
            sim_mat_perm_list, alignment_combos
        )
    return rho_array, perm_list_all



In [None]:
sim_mat_list = []
max_perm = 10000
n_known = 0
rho_array, perm_list = permutation_analysis_random(
            sim_mat_list, max_perm=max_perm, n_known=n_known)
n_perm = len(rho_array) - 1
perm_percentile = np.sum(np.less(rho_array[1:], rho_array[0])) / n_perm

In [None]:
#### calculate similarity matrix

# using cosine similarity 
from sklearn.metrics.pairwise import cosine_similarity
sim_mat = cosine_similarity(embedMat)

In [None]:
allAct_relu_Ani, allAct_maxp_Ani = getActivation(animals20_cate, correctImIdx_Ani)

Processing 30 images in n02137549...
mongoose: 99.1%
mongoose: 98.6%
mongoose: 93.3%
mongoose: 75.4%
mongoose: 36.8%
mongoose: 81.1%
mongoose: 54.7%
mongoose: 94.6%
mongoose: 99.7%
mongoose: 99.9%
mongoose: 92.4%
mongoose: 100.0%
mongoose: 98.3%
mongoose: 100.0%
mongoose: 92.6%
mongoose: 91.3%
mongoose: 96.0%
mongoose: 99.9%
mongoose: 99.4%
mongoose: 100.0%
mongoose: 64.9%
mongoose: 95.6%
mongoose: 81.6%
mongoose: 99.9%
DONE WITH CATEGORY n02137549: mongoose

Processing 30 images in n02110063...
malamute: 89.5%
malamute: 47.1%
malamute: 59.3%
malamute: 96.8%
malamute: 72.4%
malamute: 94.7%
malamute: 53.9%
malamute: 76.7%
malamute: 86.5%
malamute: 51.2%
malamute: 86.3%
malamute: 98.5%
malamute: 91.6%
malamute: 82.6%
malamute: 46.2%
malamute: 72.7%
malamute: 90.2%
malamute: 79.4%
malamute: 66.0%
malamute: 50.5%
malamute: 86.2%
malamute: 73.3%
malamute: 73.4%
malamute: 34.6%
malamute: 77.8%
malamute: 58.4%
malamute: 77.9%
DONE WITH CATEGORY n02110063: malamute

Processing 30 images in n02

In [None]:
allAct_relu_Ani, allAct_maxp_Ani = all_activations_relu, all_activations_maxp

In [None]:
allAct_relu_Veh, allAct_maxp_Veh = all_activations_relu, all_activations_maxp

In [None]:
allAct_relu_fV, allAct_maxp_fV = all_activations_relu, all_activations_maxp

In [None]:
allAct_relu_Ani['n02137549']

{}

In [None]:
allAct_relu_Veh

{'n03447447': {},
 'n03344393': {},
 'n03662601': {},
 'n04273569': {},
 'n03100240': {},
 'n03218198': {},
 'n04483307': {},
 'n03670208': {},
 'n03954731': {},
 'n03930630': {},
 'n04347754': {},
 'n02930766': {},
 'n03868242': {},
 'n02951358': {},
 'n04252077': {},
 'n04389033': {},
 'n04509417': {},
 'n03594945': {},
 'n02692877': {},
 'n04335435': {}}

In [None]:
allAct_relu_Veh, allAct_maxp_Veh = getActivation(vehicles20_cate)

Processing 30 images in n03447447...
DONE WITH CATEGORY n03447447: gondola

Processing 30 images in n03344393...
DONE WITH CATEGORY n03344393: fireboat

Processing 30 images in n03662601...
DONE WITH CATEGORY n03662601: lifeboat

Processing 30 images in n04273569...
DONE WITH CATEGORY n04273569: speedboat

Processing 30 images in n03100240...
DONE WITH CATEGORY n03100240: convertible

Processing 30 images in n03218198...
DONE WITH CATEGORY n03218198: dogsled

Processing 30 images in n04483307...
DONE WITH CATEGORY n04483307: trimaran

Processing 30 images in n03670208...
DONE WITH CATEGORY n03670208: limousine

Processing 30 images in n03954731...
DONE WITH CATEGORY n03954731: plane

Processing 30 images in n03930630...
DONE WITH CATEGORY n03930630: pickup

Processing 30 images in n04347754...
DONE WITH CATEGORY n04347754: submarine

Processing 30 images in n02930766...
DONE WITH CATEGORY n02930766: cab

Processing 30 images in n03868242...
DONE WITH CATEGORY n03868242: oxcart

Process

In [None]:
allAct_relu_fV, allAct_maxp_fV = getActivation(fruitVege20_cate)

Processing 30 images in n07718747...
DONE WITH CATEGORY n07718747: artichoke

Processing 30 images in n07715103...
DONE WITH CATEGORY n07715103: cauliflower

Processing 30 images in n07734744...
DONE WITH CATEGORY n07734744: mushroom

Processing 30 images in n07768694...
DONE WITH CATEGORY n07768694: pomegranate

Processing 30 images in n07714990...
DONE WITH CATEGORY n07714990: broccoli

Processing 30 images in n11879895...
DONE WITH CATEGORY n11879895: rapeseed

Processing 30 images in n12267677...
DONE WITH CATEGORY n12267677: acorn

Processing 30 images in n12768682...
DONE WITH CATEGORY n12768682: buckeye

Processing 30 images in n07753275...
DONE WITH CATEGORY n07753275: pineapple

Processing 30 images in n12998815...
DONE WITH CATEGORY n12998815: agaric

Processing 30 images in n07716358...
DONE WITH CATEGORY n07716358: zucchini

Processing 30 images in n07749582...
DONE WITH CATEGORY n07749582: lemon

Processing 30 images in n13037406...
DONE WITH CATEGORY n13037406: gyromitra


In [None]:
with open('my_dict.pkl', 'wb') as f:
    pickle.dump(my_dict, f)

In [None]:

import pickle 
dicts = [allAct_relu_Ani, allAct_maxp_Ani, allAct_relu_Veh, allAct_maxp_Veh, 
         allAct_relu_fV, allAct_maxp_fV]
for dictItem in dicts:
    print(dictItem)
    with open(f'{dictItem}.pkl', 'wb') as f:
        pickle.dump(dictItem, f)

{'n02137549': {}, 'n02110063': {}, 'n02342885': {}, 'n02028035': {}, 'n02091134': {}, 'n02398521': {}, 'n02483708': {}, 'n02445715': {}, 'n02391049': {}, 'n01824575': {}, 'n02422106': {}, 'n01882714': {}, 'n01872401': {}, 'n02490219': {}, 'n01770393': {}, 'n02112137': {}, 'n02006656': {}, 'n02013706': {}, 'n02500267': {}, 'n01873310': {}}


OSError: ignored

In [None]:
all_activations_relu['n02137549']['features.1'].shape

(3211264,)

In [None]:
print(allAct_relu_Ani.keys())
print(f"relu_outputs['features.3'].shape: {allAct_relu_Ani['n02137549']['features.3'].shape}")
print(f"relu_outputs['classifier.4'].shape: {allAct_relu_Ani['n02137549']['classifier.4'].shape}")
print(f"maxp_outputs['features.9'].shape: {allAct_maxp_Ani['n02137549']['features.9'].shape}")

dict_keys(['n02137549', 'n02110063', 'n02342885', 'n02028035', 'n02091134', 'n02398521', 'n02483708', 'n02445715', 'n02391049', 'n01824575', 'n02422106', 'n01882714', 'n01872401', 'n02490219', 'n01770393', 'n02112137', 'n02006656', 'n02013706', 'n02500267', 'n01873310'])


KeyError: ignored

In [None]:
allAct_relu_Ani

{'n02137549': {},
 'n02110063': {},
 'n02342885': {},
 'n02028035': {},
 'n02091134': {},
 'n02398521': {},
 'n02483708': {},
 'n02445715': {},
 'n02391049': {},
 'n01824575': {},
 'n02422106': {},
 'n01882714': {},
 'n01872401': {},
 'n02490219': {},
 'n01770393': {},
 'n02112137': {},
 'n02006656': {},
 'n02013706': {},
 'n02500267': {},
 'n01873310': {}}

In [None]:
##### test code

In [None]:
a = torch.tensor(np.array([[1.,2.,3.],[4.,5.,6.],[7.,8.,9.]]))

In [None]:
a.flatten()

tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64)

In [None]:
a.detach().mean(dim=0).flatten()

tensor([4., 5., 6.], dtype=torch.float64)

In [None]:
relu_outputs.keys()

dict_keys(['features.1', 'features.3', 'features.6', 'features.8', 'features.11', 'features.13', 'features.15', 'features.18', 'features.20', 'features.22', 'features.25', 'features.27', 'features.29', 'classifier.1', 'classifier.4'])