In [1]:
import cv2
from matplotlib import pyplot as plt
import torch
from torch import nn
from tqdm import tqdm


from models.yolo import Model
from utils.torch_utils import intersect_dicts

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn as nn
import yaml
from torch import nn

from models.yolo import Model
from utils.torch_utils import intersect_dicts

yolo_features = {}
def get_features(name):
    def hook(model, input, output):
        yolo_features[name] = output.detach()
    return hook

class TrackingHead(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1_3 = nn.Conv2d(in_channels=2048, out_channels=512, kernel_size=3)
        self.bn1 = nn.BatchNorm2d(512)
        self.conv2_3 = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3)
        self.bn2 = nn.BatchNorm2d(64)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()
        self.maxpool = nn.MaxPool2d(2)
        self.linear1 = nn.Linear(64*4*8, 640*640*1)
        yolo_features = {}

    def forward(self, x):
        x = self.bn1(self.relu(self.conv1_3(x)))
        x = self.bn2(self.relu(self.conv2_3(x)))
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.sigmoid(self.linear1(x))
        return x

class YoloTracker(nn.Module):
    def __init__(self,
                cutoff_layer=50, 
                weights = 'yolov7.pt',
                device = 'cuda',
                cfg = './cfg/training/yolov7.yaml',
                hyp = './data/hyp.scratch.p5.yaml',
                nc = 80,
                window = 2,
                ) -> None:
        super().__init__()
        with open(hyp) as f:
            hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps
        
        tracking_classes = [0]
        pretrained = weights.endswith('.pt')
        if pretrained:
            ckpt = torch.load(weights, map_location=device)  # load checkpoint
            model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
            exclude = ['anchor'] if (cfg or hyp.get('anchors')) else []  # exclude keys
            state_dict = ckpt['model'].float().state_dict()  # to FP32
            state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
            model.load_state_dict(state_dict, strict=False)  # load
        
        self.detection_branch = model
        self.yolo_features = {}
        self.detection_branch.model[cutoff_layer].register_forward_hook(get_features('features'))
        # self.detection_branch = self.detection_branch['model']
        self.track = TrackingHead().to(device)
        
    def forward(self, current_frame):
        track = True
        if 'features' in self.yolo_features.keys():
            last_feats = self.yolo_features['features']
            track = False
            trk_p = None
        yolo_p = self.detection_branch(current_frame)
        if track:
            trk_p = self.track(torch.cat(last_feats, self.yolo_features['features'], dim=1))
        return yolo_p, trk_p

In [2]:
model = YoloTracker()

TypeError: get_features() missing 1 required positional argument: 'name'

In [2]:
# ckpt = torch.load('./yolov7.pt', map_location='cuda')  # load checkpoint
# model = Model(ckpt['model'].yaml, ch=3, nc=80, anchors=3).to('cuda')  # create
# exclude = ['anchor']  # exclude keys
# state_dict = ckpt['model'].float().state_dict()  # to FP32
# state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
# model.load_state_dict(state_dict, strict=False)  # load

In [12]:
import yaml
from models.yolo import Model
from utils.torch_utils import intersect_dicts

cutoff_layer=50
weights = 'yolov7.pt'
device = 'cuda'
cfg = './cfg/training/yolov7.yaml'
hyp = './data/hyp.scratch.p5.yaml'
nc = 80
window = 2
with open(hyp) as f:
    hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps

tracking_classes = [0]
pretrained = weights.endswith('.pt')
if pretrained:
    ckpt = torch.load(weights, map_location=device)  # load checkpoint
    model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # create
    exclude = ['anchor'] if (cfg or hyp.get('anchors')) else []  # exclude keys
    state_dict = ckpt['model'].float().state_dict()  # to FP32
    state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
    model.load_state_dict(state_dict, strict=False)  # load

In [14]:
ckpt.keys()

dict_keys(['model', 'optimizer', 'training_results', 'epoch'])

In [15]:
ckpt = torch.load('yolov7.pt', map_location='cuda')  # load
model = ckpt['model'].float().fuse().eval()  # FP32 model
dummy = torch.rand([1, 3, 640, 640]).to('cuda')

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block


In [16]:
with torch.no_grad():
    pred = model(dummy, augment=False)

In [17]:
len(pred)

2

In [12]:
print(pred[0].shape)
for i in pred[1]:
    print(i.shape)

torch.Size([1, 15120, 85])
torch.Size([1, 3, 48, 80, 85])
torch.Size([1, 3, 24, 40, 85])
torch.Size([1, 3, 12, 20, 85])


In [None]:
from utils.general import non_max_suppression
pred = non_max_suppression(pred, 0.25, 0.45, classes=opt.classes, agnostic=False)


In [5]:
features = {}
def get_features(name):
    def hook(model, input, output):
        features[name] = output.detach()
    return hook

In [6]:
model.model[50].register_forward_hook(get_features('backbone'))

<torch.utils.hooks.RemovableHandle at 0x2872c7cf250>

In [8]:
PREDS = []
FEATS = []
dummy = torch.rand([1, 3, 384, 640]).to('cuda')
# forward pass [with feature extraction]
preds = model(dummy)

# add feats and preds to lists
# PREDS.append(preds.detach().cpu().numpy())
FEATS.append(features['backbone'].cpu().numpy())


In [9]:
features['backbone'].shape

torch.Size([1, 1024, 12, 20])

In [14]:
freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # parameter names to freeze (full or partial)
for k, v in model.named_parameters():
    v.requires_grad = True  # train all layers
    if any(x in k for x in freeze):
        print('freezing %s' % k)
        v.requires_grad = False

3

In [9]:
import torch
from models.linear_transformer import linear_base
lin_trans = linear_base()
# state_dict = torch.load('linear-base-checkpoint.pth')
# lin_trans.load_state_dict(state_dict=state_dict['model'])

<All keys matched successfully>

In [11]:
x = torch.randn(2, 3, 640, 640)
out = lin_trans(x)
print('-----')
print(f'num params: {sum(p.numel() for p in lin_trans.parameters())}')
print(out.shape)
loss = out.sum()
loss.backward()
print('Single iteration completed successfully')

RuntimeError: Given normalized_shape=[197], expected input with shape [*, 197], but got input of size[2, 768, 1601]

In [114]:
from PIL import Image
import os
import numpy as np

path = r'D:\datasets\MOT17\MOT17\test\MOT17-01-DPM\img1'
window = 10
file_list = os.listdir(path)
# file_list.reverse()
alpha_range = np.flip(np.arange(105, 256, step=(255-100)//window)[1:])
image = Image.open(os.path.join(path, file_list[0]))
image.putalpha(alpha_range[0])
for img, alpha in zip(reversed(os.listdir(path)[1:]), alpha_range[1:]):
    image_1 = Image.open(os.path.join(path, img))
    image_1.putalpha(alpha)
    image.paste(image_1, (0, 0), image_1)

In [117]:
img = [cv2.cvtColor(cv2.imread(os.path.join(path, file)), cv2.COLOR_BGR2RGB) for file in file_list[:10]]

In [129]:
alpha_range = np.arange(0.5, 1.01, step=0.5/window)[1:]
img_pile = [(i*alpha).astype(int) for i, alpha in  zip(img, alpha_range)]
# img1 = (img[0]*0.5).astype(int)
# img2 = img[-1]

In [136]:
(sum(img_pile)//len(img_pile)).shape

(1080, 1920, 3)

In [137]:
cv2.imwrite('ten-frame-avg.jpg', sum(img_pile)//len(img_pile))

True

In [121]:
img_avg = (img2+img1)//2
cv2.imwrite('test.jpg', img_avg)
cv2.imwrite('frame0.jpg', img[0])
cv2.imwrite('frame_last.jpg', img[-1])

True

In [None]:
import glob

from PIL import Image


def make_gif(frames):
    frames = [Image.fromarray(image) for image in frames]
    frame_one = frames[0]
    frame_one.save("dif.gif", format="GIF", append_images=frames,
               save_all=True, duration=100, loop=0)
    

if __name__ == "__main__":
    make_gif()

In [15]:
# With square kernels and equal stride
m = nn.ConvTranspose2d(1024, 256, 3, stride=3)

# non-square kernels and unequal stride and with padding
# m = nn.ConvTranspose2d(1024, 256, (3, 5), stride=(2, 1), padding=(4, 2))
input1 = torch.randn(1, 1024, 20, 20)
input2 = torch.randn(1, 1024, 20, 20)

x = torch.cat([input1, input2], dim=1)
print(input.size())
m = nn.ConvTranspose2d(2048, 512, 4, stride=4)
x = m(x)
print(x.size())
m = nn.ConvTranspose2d(512, 64, 4, stride=4)
x = m(x)
print(x.size())
m = nn.ConvTranspose2d(64, 16, 2, stride=2)
x = m(x)
print(x.size())
m = nn.Conv2d(16, 1, 1, stride=1)
x = m(x)
print(x.size())
# m = nn.ConvTranspose2d(512, 256, (3, 2), stride=(3, 2))
# output = m(output)
# m = nn.ConvTranspose2d(256, 128, (2, 2), stride=(3, 2))
# output = m(output)
# m = nn.ConvTranspose2d(128, 64, (2, 2), stride=(2,3))
# output = m(output)
# m = nn.ConvTranspose2d(64, 32, (2, 2), stride=(2, 2))
# output = m(output)
# m = nn.ConvTranspose2d(32, 16, (2, 1), stride=(2, 2))
# output = m(output)
# m = nn.Conv2d(2048, 512, 3)
# output = m(input)
# m = nn.Conv2d(512, 64, 3)
# output = m(output)
# # output.size()
# m = nn.MaxPool2d(2)
# # f = nn.Flatten()
# output = m(output)
# exact output size can be also specified as an argument
# input = torch.randn(1, 16, 12, 12)
# downsample = nn.Conv2d(16, 16, 3, stride=2, padding=1)
# upsample = nn.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
# h = downsample(input)
# h.size()
# output = upsample(h, output_size=input.size())
# output.size()

torch.Size([1, 2048, 20, 20])
torch.Size([1, 512, 80, 80])
torch.Size([1, 64, 320, 320])
torch.Size([1, 16, 640, 640])
torch.Size([1, 1, 640, 640])


In [16]:
x

tensor([[[[0.09806, 0.18374, 0.17956,  ..., 0.13409, 0.10269, 0.08879],
          [0.20947, 0.15328, 0.16049,  ..., 0.09094, 0.11564, 0.19846],
          [0.13016, 0.13465, 0.15796,  ..., 0.10224, 0.16818, 0.15593],
          ...,
          [0.13561, 0.10059, 0.13223,  ..., 0.14580, 0.17216, 0.10702],
          [0.11683, 0.17047, 0.10594,  ..., 0.09445, 0.06275, 0.11017],
          [0.15938, 0.06229, 0.11667,  ..., 0.06029, 0.06223, 0.11421]]]], grad_fn=<ConvolutionBackward0>)

In [1]:
from torch import nn
class TrackingHead(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1_3 = nn.Conv2d(in_channels=2048, out_channels=512, kernel_size=3)
        self.bn1 = nn.BatchNorm2d(512)
        self.conv2_3 = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3)
        self.bn2 = nn.BatchNorm2d(64)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()
        self.maxpool = nn.MaxPool2d(2)
        self.linear1 = nn.Linear(64*4*8, 640*640*1)
    
    def forward(self, x):
        x = self.bn1(self.relu(self.conv1_3(x)))
        x = self.bn2(self.relu(self.conv2_3(x)))
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.sigmoid(self.linear1(x))
        return x

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
modeler = TrackingHead().to('cuda')

In [4]:
import torch
input1 = torch.randn(1, 1024, 12, 20)
input2 = torch.randn(1, 1024, 12, 20)

input = torch.cat([input1, input2], dim=1).to('cuda')
out = modeler(input)

In [10]:
out.view(1, 640, 640)

tensor([[[0.4946, 0.5104, 0.5643,  ..., 0.7803, 0.5898, 0.8119],
         [0.7215, 0.7817, 0.4841,  ..., 0.4375, 0.1060, 0.8303],
         [0.6543, 0.4936, 0.5628,  ..., 0.6289, 0.2710, 0.5993],
         ...,
         [0.8595, 0.5496, 0.6923,  ..., 0.1678, 0.5367, 0.2149],
         [0.5750, 0.6820, 0.2098,  ..., 0.2899, 0.2678, 0.8842],
         [0.5821, 0.1282, 0.2823,  ..., 0.6395, 0.2768, 0.4981]]],
       grad_fn=<ViewBackward0>)

In [1]:
import torch
import torch.nn as nn

from models.tracker import YoloTracker

model = YoloTracker(device='cuda')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dummy = torch.rand([1, 3, 640, 640]).to('cuda')
dummy2 = torch.rand([1, 3, 640, 640]).to('cuda')
y_p1, t_p1 = model(dummy)
y_p2, t_p2 = model(dummy2)

In [4]:
t_p2.shape

torch.Size([1, 1, 640, 640])

In [15]:
y_p[0]

pxy = ps[:, :2].sigmoid() * 2. - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1)  # predicted box

torch.Size([1, 3, 80, 80, 85])

In [10]:
from utils.general import non_max_suppression
pred = non_max_suppression(y_p, 0.25, 0.45, classes=[0], agnostic=False)

AttributeError: 'list' object has no attribute 'shape'