<a href="https://colab.research.google.com/github/Oreolorun/Visual-Similarity/blob/main/Visual_Similarity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#  importing libraries 
import os
import numpy as np
import torch
from torchvision import transforms
from torchvision import models
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from tqdm import tqdm
from tqdm.notebook import tqdm as tqdm_notebook
import matplotlib.pyplot as plt
import cv2
import seaborn as sns
import pandas as pd
import pickle
import shutil

In [None]:
#  mounting drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
#  building neural network (100px with batchnorm)
class CarRecognition_bn100(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 32, 3)
    self.conv2 = nn.Conv2d(32, 32, 3)
    self.conv3 = nn.Conv2d(32, 64, 3)
    self.conv4 = nn.Conv2d(64, 64, 3)
    self.conv5 = nn.Conv2d(64, 128, 3)
    self.conv6 = nn.Conv2d(128, 128 ,3)
    self.conv7 = nn.Conv2d(128, 128, 3)
    self.fc1 = nn.Linear(8192, 514)
    self.fc2 = nn.Linear(514, 128)
    self.fc3 = nn.Linear(128, 4)
    self.pool2 = nn.MaxPool2d(2,2)
    self.pool4 = nn.MaxPool2d(2,2)
    self.pool7 = nn.MaxPool2d(2,2)
    self.batchnorm_conv1 = nn.BatchNorm2d(32)
    self.batchnorm_conv2 = nn.BatchNorm2d(32)
    self.batchnorm_conv3 = nn.BatchNorm2d(64)
    self.batchnorm_conv4 = nn.BatchNorm2d(64)
    self.batchnorm_conv5 = nn.BatchNorm2d(128)
    self.batchnorm_conv6 = nn.BatchNorm2d(128)
    self.batchnorm_conv7 = nn.BatchNorm2d(128)
    self.batchnorm_fc1 = nn.BatchNorm1d(514)
    self.batchnorm_fc2 = nn.BatchNorm1d(128)

  def forward(self, x):
    x = x.view(-1, 3, 100, 100).float()
    x = F.relu(self.batchnorm_conv1(self.conv1(x)))
    x = self.pool2(F.relu(self.batchnorm_conv2(self.conv2(x))))
    x = F.relu(self.batchnorm_conv3(self.conv3(x)))
    x = self.pool4(F.relu(self.batchnorm_conv4(self.conv4(x))))
    x = F.relu(self.batchnorm_conv5(self.conv5(x)))
    x = F.relu(self.batchnorm_conv6(self.conv6(x)))
    x = self.pool7(F.relu(self.batchnorm_conv7(self.conv7(x))))
    x = torch.flatten(x,1)
    x = F.relu(self.batchnorm_fc1(self.fc1(x)))
    return x

In [None]:
#  loading model state
model = CarRecognition_bn100()
model.load_state_dict(torch.load('gdrive/My Drive/Neural Networks/Model_States/CarType100_model_state_1e-2_9ep.pt', map_location=device))

In [None]:
def load_filenames():
  """
  This function loads 2000 random images from each directory
  """
  dir =  {
          'sedan': 'gdrive/My Drive/Datasets/Car_Images/sedans',
          'coupe': 'gdrive/My Drive/Datasets/Car_Images/coupes',
          'suv': 'gdrive/My Drive/Datasets/Car_Images/suvs',
          'truck': 'gdrive/My Drive/Datasets/Car_Images/trucks'
          }

  all_files = []
  selected_files = []
  loaded_images = []

  print('deriving filenames')
  for key, value in tqdm(dir.items()):
    files = os.listdir(value)
    all_files.append(files)

  print('selecting random files')
  for file_list in tqdm(all_files):
    np.random.shuffle(file_list)
    selected = file_list[:2000]
    selected_files.extend(selected)

  print('loading images')
  for f in tqdm(selected_files):
    #  deriving filepath
    if 'sedan' in f:
      path = os.path.join(dir['sedan'], f)
    elif 'coupe' in f:
      path = os.path.join(dir['coupe'], f)
    elif 'suv' in f:
      path = os.path.join(dir['suv'], f)
    elif 'truck' in f:
      path = os.path.join(dir['truck'], f)

    #  loading image
    try:
      image = cv2.imread(path)
      image = cv2.resize(image, (100, 100))
    except Exception:
      pass
    
    #  saving to list
    loaded_images.append([image, f])

  return loaded_images

In [None]:
files = load_filenames()
files = [x for x in files if x[0] is not None]

In [None]:
#  saving image arrays
with open('gdrive/My Drive/Datasets/similarity_images.pkl', 'wb') as f:
  pickle.dump(files, f)

In [None]:
#  loading image array
with open('gdrive/My Drive/Datasets/similarity_images.pkl', 'rb') as f:
  files = pickle.load(f)

In [None]:
#  extracting filenames
filenames = [x[1] for x in files]

In [None]:
def derive_filepaths(file_list):
  """
  This function derives the filepath of the selected images
  """
  dir =  {
          'sedan': 'gdrive/My Drive/Datasets/Car_Images/sedans',
          'coupe': 'gdrive/My Drive/Datasets/Car_Images/coupes',
          'suv': 'gdrive/My Drive/Datasets/Car_Images/suvs',
          'truck': 'gdrive/My Drive/Datasets/Car_Images/trucks'
          }

  all = []

  for f in tqdm(file_list):
    #  deriving filepath
    if 'sedan' in f:
      path = os.path.join(dir['sedan'], f)
    elif 'coupe' in f:
      path = os.path.join(dir['coupe'], f)
    elif 'suv' in f:
      path = os.path.join(dir['suv'], f)
    elif 'truck' in f:
      path = os.path.join(dir['truck'], f)
  
    all.append(path)
  return all

In [None]:
#  deriving filepaths
filepaths = derive_filepaths(filenames)

In [None]:
#  copying images
destination = 'gdrive/My Drive/Datasets/Car_Images/similarity_images'

for i in tqdm(range(len(filepaths))):
  shutil.copy(filepaths[i], destination)

len(os.listdir('gdrive/My Drive/Datasets/Car_Images/similarity_images'))

In [None]:
#  preprocessing image arrays into tensors
files = [[img/255, f] for img, f in files]
files = [[transforms.ToTensor()(img), f] for img, f in files]

In [None]:
#  extracting features
model.eval()
with torch.no_grad():
  files = [[model(img), f] for img, f in tqdm(files)]

In [None]:
#  saving extracted features
with open('gdrive/My Drive/Datasets/similarity_features.pkl', 'wb') as f:
  pickle.dump(files, f)

In [None]:
#  loading image features
with open('gdrive/My Drive/Datasets/similarity_features.pkl', 'rb') as f:
  image_features = pickle.load(f)

In [None]:
#  deriving similarity scores
similarity = [[F.cosine_similarity(files[4879][0], img).item(), f] for img, f in tqdm(files)]

In [None]:
#  creating a series of scores and filenames
scores = [x[0] for x in similarity]
f_names = [x[1] for x in similarity]

sr = pd.Series(scores, index=f_names)
sr = sr.sort_values(ascending=False)
sr.head(10)