# Dataset

100 random images each of cats and dogs taken from **Dogs vs. Cats** kaggle dataset
Link: https://www.kaggle.com/c/dogs-vs-cats/data

In [10]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/Data/Cat_Vs_Dog

/content/drive/MyDrive/Data/Cat_Vs_Dog


In [15]:
from tensorflow.keras.models import Model
from tensorflow.keras import applications
from tensorflow.keras.layers import Flatten

VGG_16 = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in VGG_16.layers])

OutPut = layer_dict['block5_pool'].output 
OutPut = Flatten()(OutPut)


custom_model = Model(inputs=VGG_16.input, outputs=OutPut)

# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model.layers:
    layer.trainable = False

custom_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)    

In [19]:
import os
import random
import numpy as np
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img,img_to_array

list_imgs = os.listdir()
random.shuffle(list_imgs)

pwd = '/content/drive/MyDrive/Data/Cat_Vs_Dog'

VGG_Feature_Matrix = np.zeros((200,25088))

for i,img_p in enumerate(list_imgs):
  img_path = os.path.join(pwd,img_p)
  image = load_img(img_path, target_size=(224, 224))
  img_data = img_to_array(image)
  img_data = np.expand_dims(img_data, axis=0)
  img_data = preprocess_input(img_data)

  feature = custom_model.predict(img_data)
  feature = np.squeeze(feature)

  VGG_Feature_Matrix[i] = feature

In [20]:
import torch
import torch.nn.functional as F

def cosDistance(features):
    # features: N*M matrix. N features, each features is M-dimension.
    features = F.normalize(features, dim=1) # each feature's l2-norm should be 1 
    similarity_matrix = torch.matmul(features, features.T)
    distance_matrix = 1.0 - similarity_matrix
    return distance_matrix

In [26]:
cos_sim_mat = cosDistance(torch.from_numpy(VGG_Feature_Matrix))

threshold = 0.5
adj_matrix = torch.gt(cos_sim_mat, threshold).int() 

In [27]:
adj_matrix

tensor([[0, 1, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1],
        [1, 1, 0,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 0, 1, 1],
        [1, 1, 1,  ..., 1, 0, 1],
        [1, 1, 1,  ..., 1, 1, 0]], dtype=torch.int32)