# Libraries

In [1]:
import torch
import torchvision
import torch.nn as nn
from torchinfo import summary
from torchvision.io import read_image
from torchvision.transforms import transforms
import numpy as np
import cv2
import matplotlib.pyplot as plt
import CBComp as cb

# Default Device

In [2]:
if torch.cuda.is_available(): print(torch.cuda.get_device_name()) 
else: print('cpu')
default_device = 'cuda' if torch.cuda.is_available() else 'cpu'

NVIDIA GeForce GTX 1660 Ti


# Model - Feature Extractor

In [3]:
class FeatureExtractor(nn.Module):

    def __init__(self,  weights=torchvision.models.VGG16_Weights.IMAGENET1K_V1):
        super(FeatureExtractor, self).__init__()
        VGG = torchvision.models.vgg16(weights=weights)
        self.feature = VGG.features
        self.classifier = VGG.classifier[:5] 

    def forward(self, x):
        x = self.feature(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
    
    def extract(self, x):
        x = self.forward(x)
        x = x.cpu().detach().numpy()
        x = (x / (np.linalg.norm(x,axis=1))[:, np.newaxis])
        return x

In [4]:
feature_extractor = FeatureExtractor().to(default_device)

In [5]:
summary(feature_extractor, input_size=(1, 3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
FeatureExtractor                         [1, 4096]                 --
├─Sequential: 1-1                        [1, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [1, 64, 224, 224]         1,792
│    └─ReLU: 2-2                         [1, 64, 224, 224]         --
│    └─Conv2d: 2-3                       [1, 64, 224, 224]         36,928
│    └─ReLU: 2-4                         [1, 64, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 64, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 128, 112, 112]        73,856
│    └─ReLU: 2-7                         [1, 128, 112, 112]        --
│    └─Conv2d: 2-8                       [1, 128, 112, 112]        147,584
│    └─ReLU: 2-9                         [1, 128, 112, 112]        --
│    └─MaxPool2d: 2-10                   [1, 128, 56, 56]          --
│    └─Conv2d: 2-11                      [1, 256, 56, 56]          29

# Data Processing

In [6]:
transform = transforms.Compose([transforms.Resize((224,224), antialias=True),
                                transforms.ConvertImageDtype(torch.float),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [22]:
lion1 = transform(read_image("C:/Users/Alireza/Desktop/1.jpg"))
lion2 = transform(read_image("C:/Users/Alireza/Desktop/2.jpg"))
fox = transform(read_image("C:/Users/Alireza/Desktop/3.jpeg"))

q_batch = torch.reshape(lion1,[1,3,224,224])
d_batch = torch.stack((lion2,fox))
with torch.no_grad():
    f_q = feature_extractor.extract(q_batch.to(default_device))
    f_d = feature_extractor.extract(d_batch.to(default_device))

# Distance

In [7]:
def distance(f_q, f_d):
    dist = []
    for feature in f_d:
        dist.append(np.linalg.norm(f_q-feature))
        indx = np.argmin(np.array(dist))
    return indx, dist[indx]

In [None]:
distance(f_q, f_d)

# Video 

In [4]:
cap = cv2.VideoCapture('C:/Users/Alireza/Desktop/Vivint_Outdoor_Camera_Pro_Sample_Footage_Driveway_Dayti_yWFv9eA72LA.mp4')

In [90]:
ret ,frame = cap.read()

In [92]:
bbox = cv2.selectROI("Select Area", frame, fromCenter=False, showCrosshair=True)
cv2.destroyAllWindows()

In [93]:
bbox

(515, 121, 117, 199)

In [94]:
frame.shape

(720, 1280, 3)

In [2]:
a = torch.rand([244,244,3])

In [4]:
a.shape

torch.Size([244, 244, 3])

In [6]:
a.permute(2,1,0).shape

torch.Size([3, 244, 244])

# rest

In [1]:
import torchvision
import torch
from torchinfo import summary

In [6]:
Alex = torchvision.models.alexnet(weights=torchvision.models.AlexNet_Weights.IMAGENET1K_V1)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to C:\Users\Alireza/.cache\torch\hub\checkpoints\alexnet-owt-7be5be79.pth
100.0%


In [11]:
Alex.classifier[:6]

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
)

In [9]:
summary(Alex)

Layer (type:depth-idx)                   Param #
AlexNet                                  --
├─Sequential: 1-1                        --
│    └─Conv2d: 2-1                       23,296
│    └─ReLU: 2-2                         --
│    └─MaxPool2d: 2-3                    --
│    └─Conv2d: 2-4                       307,392
│    └─ReLU: 2-5                         --
│    └─MaxPool2d: 2-6                    --
│    └─Conv2d: 2-7                       663,936
│    └─ReLU: 2-8                         --
│    └─Conv2d: 2-9                       884,992
│    └─ReLU: 2-10                        --
│    └─Conv2d: 2-11                      590,080
│    └─ReLU: 2-12                        --
│    └─MaxPool2d: 2-13                   --
├─AdaptiveAvgPool2d: 1-2                 --
├─Sequential: 1-3                        --
│    └─Dropout: 2-14                     --
│    └─Linear: 2-15                      37,752,832
│    └─ReLU: 2-16                        --
│    └─Dropout: 2-17                   