In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as K 
from tensorflow.keras.models import Model
from tensorflow.keras import applications
from tensorflow.keras.layers import GlobalAveragePooling2D

In [2]:
(x_train, y_train), (x_test, y_test) = K.datasets.cifar10.load_data()

In [4]:
VGG_16 = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in VGG_16.layers])

OutPut = layer_dict['block5_pool'].output 
OutPut = GlobalAveragePooling2D()(OutPut)

VGG_Base = Model(inputs=VGG_16.input,outputs=OutPut)

# Make sure that the pre-trained bottom layers are not trainable
for layer in VGG_Base.layers:
    layer.trainable = False

VGG_Base.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)    

In [5]:
from tensorflow.keras.layers import UpSampling2D

VGG_Feature_Matrix = np.zeros((x_train.shape[0],512))
count = 0

for idx,img in enumerate(x_train):
  count+=1
  if(count%1000 == 0):
    print(count," Images Done")
  img = tf.expand_dims(img, axis=0)
  up_sampled_image = UpSampling2D(size=(7, 7))(img)
  
  feature = VGG_Base.predict(up_sampled_image)
  feature = np.squeeze(feature)

  VGG_Feature_Matrix[idx] = feature

1000  Images Done
2000  Images Done
3000  Images Done
4000  Images Done
5000  Images Done
6000  Images Done
7000  Images Done
8000  Images Done
9000  Images Done
10000  Images Done
11000  Images Done
12000  Images Done
13000  Images Done
14000  Images Done
15000  Images Done
16000  Images Done
17000  Images Done
18000  Images Done
19000  Images Done
20000  Images Done
21000  Images Done
22000  Images Done
23000  Images Done
24000  Images Done
25000  Images Done
26000  Images Done
27000  Images Done
28000  Images Done
29000  Images Done
30000  Images Done
31000  Images Done
32000  Images Done
33000  Images Done
34000  Images Done
35000  Images Done
36000  Images Done
37000  Images Done
38000  Images Done
39000  Images Done
40000  Images Done
41000  Images Done
42000  Images Done
43000  Images Done
44000  Images Done
45000  Images Done
46000  Images Done
47000  Images Done
48000  Images Done
49000  Images Done
50000  Images Done


In [6]:
# np.save("/content/drive/MyDrive/Models/CIFAR_10_VGG_Feature_Matrix.npy",VGG_Feature_Matrix)

In [3]:
VGG_Feature_Matrix = np.load("/content/drive/MyDrive/Models/CIFAR_10_VGG_Feature_Matrix.npy")

In [4]:
# Larger Subsets
First_25k_Images = VGG_Feature_Matrix[0:25000,:]
# Second_25_Images = VGG_Feature_Matrix[25000:,:]

# Smaller Subsets for Experimentation
# First_10k_Images = VGG_Feature_Matrix[0:10000,:]

In [5]:
import torch
import torch.nn.functional as F

def cosDistance(features):
    # features: N*M matrix. N features, each features is M-dimension.
    features = F.normalize(features, dim=1) # each feature's l2-norm should be 1 
    similarity_matrix = torch.matmul(features, features.T)
    distance_matrix = 1.0 - similarity_matrix
    return distance_matrix

In [6]:
cos_sim_mat = cosDistance(torch.from_numpy(First_25k_Images))
# 0.0825
threshold = 0.5
adj_matrix = torch.lt(cos_sim_mat, threshold).int()

In [7]:
unique, counts = np.unique(y_train[0:25000], return_counts=True)
class_to_counts = dict(zip(unique, counts))

print(class_to_counts)

{0: 2491, 1: 2518, 2: 2515, 3: 2522, 4: 2490, 5: 2411, 6: 2537, 7: 2530, 8: 2507, 9: 2479}


In [8]:
idx_to_class = {}

for idx,cls in enumerate(y_train[0:10000].tolist()):
  idx_to_class[idx] = cls[0]

In [9]:
class_to_idx = {}

for idx,cls in enumerate(y_train[0:10000].tolist()):
  if(cls[0] not in class_to_idx.keys()):
    class_to_idx[cls[0]] = [idx]
  else:
    class_to_idx[cls[0]].append(idx)

In [10]:
result_dict = {}
for cls in class_to_idx.keys():
  class_adj_sum = 0
  for idx in class_to_idx[cls]:
    row_vals_sum = torch.sum(adj_matrix[idx])
    class_adj_sum += row_vals_sum
  generalization_factor = ((class_to_counts[cls]*class_to_counts[cls]) - class_adj_sum)/(class_to_counts[cls]*class_to_counts[cls])
  result_dict[cls] = generalization_factor

In [11]:
print("Threshold:",threshold,"\n")
result_dict

Threshold: 0.5 



{0: tensor(-2.6240),
 1: tensor(-2.7125),
 2: tensor(-2.8265),
 3: tensor(-2.8620),
 4: tensor(-2.8435),
 5: tensor(-2.9220),
 6: tensor(-2.7394),
 7: tensor(-2.8426),
 8: tensor(-2.8606),
 9: tensor(-2.9009)}