In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
import torchvision
!conda install -y gdown

In [None]:
!gdown --id 1XVEYb0TN2SbBYOqf8SzazfYZlpH9CxyE

In [None]:
!unzip model.zip

In [None]:
!ls model/ft_ResNet50/

In [None]:
import torch
reid = torch.load('./model/ft_ResNet50/net_last.pth')

Model = ./model/ft_net_dense/model.py

In [None]:
import torch
import torch.nn as nn
from torch.nn import init
from torchvision import models
from torch.autograd import Variable

######################################################################
##
## DIFFERENT MODELS THAT WE WANTED TO TRY
## 
######################################################################

def weights_init_kaiming(m):
    classname = m.__class__.__name__
    # print(classname)
    if classname.find('Conv') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') # For old pytorch, you may use kaiming_normal.
    elif classname.find('Linear') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_out')
        init.constant_(m.bias.data, 0.0)
    elif classname.find('BatchNorm1d') != -1:
        init.normal_(m.weight.data, 1.0, 0.02)
        init.constant_(m.bias.data, 0.0)

def weights_init_classifier(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        init.normal_(m.weight.data, std=0.001)
        init.constant_(m.bias.data, 0.0)

# Defines the new fc layer and classification layer
# |--Linear--|--bn--|--relu--|--Linear--|
class ClassBlock(nn.Module):
    def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True, return_f = False):
        super(ClassBlock, self).__init__()
        self.return_f = return_f
        add_block = []
        if linear:
            add_block += [nn.Linear(input_dim, num_bottleneck)]
        else:
            num_bottleneck = input_dim
        if bnorm:
            add_block += [nn.BatchNorm1d(num_bottleneck)]
        if relu:
            add_block += [nn.LeakyReLU(0.1)]
        if droprate>0:
            add_block += [nn.Dropout(p=droprate)]
        add_block = nn.Sequential(*add_block)
        add_block.apply(weights_init_kaiming)

        classifier = []
        classifier += [nn.Linear(num_bottleneck, class_num)]
        classifier = nn.Sequential(*classifier)
        classifier.apply(weights_init_classifier)

        self.add_block = add_block
        self.classifier = classifier
    def forward(self, x):
        x = self.add_block(x)
        if self.return_f:
            f = x
            x = self.classifier(x)
            return x,f
        else:
            x = self.classifier(x)
            return x

# Define the ResNet50-based Model
class ft_net(nn.Module):

    def __init__(self, class_num, droprate=0.5, stride=2):
        super(ft_net, self).__init__()
        model_ft = models.resnet50(pretrained=True)
        # avg pooling to global pooling
        if stride == 1:
            self.model.layer4[0].downsample[0].stride = (1,1)
            self.model.layer4[0].conv2.stride = (1,1)
        model_ft.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.model = model_ft
        self.classifier = ClassBlock(2048, class_num, droprate)

    def forward(self, x):
        x = self.model.conv1(x)
        x = self.model.bn1(x)
        x = self.model.relu(x)
        x = self.model.maxpool(x)
        x = self.model.layer1(x)
        x = self.model.layer2(x)
        x = self.model.layer3(x)
        x = self.model.layer4(x)
        x = self.model.avgpool(x)
        x = x.view(x.size(0), x.size(1))
        x = self.classifier(x)
        return x

# Define the DenseNet121-based Model
class ft_net_dense(nn.Module):

    def __init__(self, class_num, droprate=0.5):
        super().__init__()
        model_ft = models.densenet121(pretrained=True)
        model_ft.features.avgpool = nn.AdaptiveAvgPool2d((1,1))
        model_ft.fc = nn.Sequential()
        self.model = model_ft
        # For DenseNet, the feature dim is 1024 
        self.classifier = ClassBlock(1024, class_num, droprate)

    def forward(self, x):
        x = self.model.features(x)
        x = x.view(x.size(0), x.size(1))
        x = self.classifier(x)
        return x
    
# Define the ResNet50-based Model (Middle-Concat)
# In the spirit of "The Devil is in the Middle: Exploiting Mid-level Representations for Cross-Domain Instance Matching." Yu, Qian, et al. arXiv:1711.08106 (2017).
class ft_net_middle(nn.Module):

    def __init__(self, class_num, droprate=0.5):
        super(ft_net_middle, self).__init__()
        model_ft = models.resnet50(pretrained=True)
        # avg pooling to global pooling
        model_ft.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.model = model_ft
        self.classifier = ClassBlock(2048+1024, class_num, droprate)

    def forward(self, x):
        x = self.model.conv1(x)
        x = self.model.bn1(x)
        x = self.model.relu(x)
        x = self.model.maxpool(x)
        x = self.model.layer1(x)
        x = self.model.layer2(x)
        x = self.model.layer3(x)
        # x0  n*1024*1*1
        x0 = self.model.avgpool(x)
        x = self.model.layer4(x)
        # x1  n*2048*1*1
        x1 = self.model.avgpool(x)
        x = torch.cat((x0,x1),1)
        x = x.view(x.size(0), x.size(1))
        x = self.classifier(x)
        return x

# Part Model proposed in Yifan Sun etal. (2018)
class PCB(nn.Module):
    def __init__(self, class_num ):
        super(PCB, self).__init__()

        self.part = 6 # We cut the pool5 to 6 parts
        model_ft = models.resnet50(pretrained=True)
        self.model = model_ft
        self.avgpool = nn.AdaptiveAvgPool2d((self.part,1))
        self.dropout = nn.Dropout(p=0.5)
        # remove the final downsample
        self.model.layer4[0].downsample[0].stride = (1,1)
        self.model.layer4[0].conv2.stride = (1,1)
        # define 6 classifiers
        for i in range(self.part):
            name = 'classifier'+str(i)
            setattr(self, name, ClassBlock(2048, class_num, droprate=0.5, relu=False, bnorm=True, num_bottleneck=256))

    def forward(self, x):
        x = self.model.conv1(x)
        x = self.model.bn1(x)
        x = self.model.relu(x)
        x = self.model.maxpool(x)
        
        x = self.model.layer1(x)
        x = self.model.layer2(x)
        x = self.model.layer3(x)
        x = self.model.layer4(x)
        x = self.avgpool(x)
        x = self.dropout(x)
        part = {}
        predict = {}
        # get six part feature batchsize*2048*6
        for i in range(self.part):
            part[i] = torch.squeeze(x[:,:,i])
            name = 'classifier'+str(i)
            c = getattr(self,name)
            predict[i] = c(part[i])

        # sum prediction
        #y = predict[0]
        #for i in range(self.part-1):
        #    y += predict[i+1]
        y = []
        for i in range(self.part):
            y.append(predict[i])
        return y

class PCB_test(nn.Module):
    def __init__(self,model):
        super(PCB_test,self).__init__()
        self.part = 6
        self.model = model.model
        self.avgpool = nn.AdaptiveAvgPool2d((self.part,1))
        # remove the final downsample
        self.model.layer4[0].downsample[0].stride = (1,1)
        self.model.layer4[0].conv2.stride = (1,1)

    def forward(self, x):
        x = self.model.conv1(x)
        x = self.model.bn1(x)
        x = self.model.relu(x)
        x = self.model.maxpool(x)

        x = self.model.layer1(x)
        x = self.model.layer2(x)
        x = self.model.layer3(x)
        x = self.model.layer4(x)
        x = self.avgpool(x)
        y = x.view(x.size(0),x.size(1),x.size(2))
        return y
'''
# debug model structure
# Run this code with:
python model.py
'''

In [None]:
from torchvision import transforms
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
## SETTLED WITH RESNET 50 with 751 output classes
net = ft_net(751)
pretrained_weights = torch.load('./model/ft_ResNet50/net_last.pth')
net.load_state_dict(pretrained_weights)

In [None]:
import pickle
### IMPORTING THE BOUNDING BOXES FROM OUR YOLO DATASET
with open('../input/bboxes/yolo_bboxes.pickle', 'rb') as handle:
    bboxes = pickle.load(handle)
    
# bboxes

In [None]:
import time

# READING IN A DICT ALL THE dirnames and the output files as joined paths
directories = ['campus','lab']
output_dir = './reduced_frames3'

dir_files = {}

for directory in directories:
    parent_path = '../input/multi-camera-person-tracking-reduced-frames/reduced_frames/' + directory + "/"
    dirs = os.listdir(parent_path)
    needed = 'asdasd'
    for item in dirs:
#         print(os.path.join(parent_path,item))
        if(os.path.isdir(os.path.join(parent_path,item))):
            needed = item
#     print(needed)
    
    for dirname, _, filenames in os.walk('../input/multi-camera-person-tracking-reduced-frames/reduced_frames/' + directory + "/" + needed + "/"):
        files = []
        filenames = map(lambda x: x.split(".")[0],filenames)
        for filename in sorted(filenames,key=int):
            files.append(os.path.join(dirname, filename + ".jpg"))
#             print(os.path.join(dirname, filename + ".jpg"))
        dir_files[directory] = files
        
# out = yolo(args['image'])

In [None]:
dir_files.keys()

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import cv2

## CROPPING THE IMAGES

cropped_imgs = {}
flag = 0
features = []
for key in dir_files:
    imgs = {}
    print(key)
    for image_path in dir_files[key]:
        img = np.array(Image.open(image_path).convert('RGB'))
        img_num = image_path.split('/')[-1].split('.')[0]
        imgs[img_num] = []
        for bbox in bboxes[key][img_num]:
#             print(img_num,bbox)
            class_id,x,y,w,h = bbox
            if(class_id == 0):
#                 print(img.shape)
                person_img = img[max(0,y):min(y + h,img.shape[0]),max(0,x):min(x + w,img.shape[1])]
                plt.imshow(person_img)
                person_img = Image.fromarray(np.uint8(person_img)).convert('RGB')
                imgs[img_num].append(person_img)
#                 print(imgs)
    cropped_imgs[key] = imgs
#                 flag = 1
#                 break
#         if(flag):
#             break
#     if(flag):
#         break
                

In [None]:
# STORING THE CROPPED IMAGES
!rm -rf cropped_ppl
!mkdir cropped_ppl
# import pickle
for key in cropped_imgs:
    for scene in cropped_imgs[key]:
        if(cropped_imgs[key][scene]):
#             print("./cropped_ppl/" + key + "_" + scene)
            cnt = 0
            for people in cropped_imgs[key][scene]:
                people.save("./cropped_ppl/" + key + "_" + scene + "_" + str(cnt) + ".jpg")
                cnt += 1
#         print(cropped_imgs[key][scene])


In [None]:
cropped_imgs

In [None]:
# CREATING A DATASET OUT OF THE CROPPED IMAGE INTO DIFFERENT SCENES

dataset = {}
for key in cropped_imgs:
    dataset[key] = []
    for scene in cropped_imgs[key]:
        ppl_scene = []
        if(cropped_imgs[key][scene]):
            for people in cropped_imgs[key][scene]:
                dataset[key].append(people)

total_dataset = []
total_dataset.extend(list(dataset.values())[0])
total_dataset.extend(list(dataset.values())[1])

len(total_dataset)
for i in range(len(total_dataset)):
    total_dataset[i] = preprocess(total_dataset[i])

In [None]:
dataset

In [None]:
total_dataset = torch.stack(total_dataset)

In [None]:
total_dataset.shape

In [None]:
from torch.utils.data import DataLoader
bs = 32
data = DataLoader(total_dataset, batch_size=bs)

In [None]:
iterator = iter(data)

In [None]:
import gc

In [None]:
!rm -rf extracted_features
!mkdir extracted_features

In [None]:
!ls extracted_features

In [None]:
dlen = len(total_dataset)
niters = dlen // bs
print(dlen, niters)

In [None]:
iterator

In [None]:
## EXTRACTING RESNET FEATURES IN OUR DATASET
features = []
net = net.to(device)
# dlen = len(total_dataset)
# niters = dlen // bs
for i in range(niters):
    x = next(iterator).to(device)
    out = net(x)
    print("iter ", i, " ", out.shape)
    torch.save(out, 'extracted_features/out' + str(i) + '.pth')

In [None]:
## COMBINED FEATURE VECTOR

path = './extracted_features/out'
features = torch.Tensor([])
for i in range(niters):
    fpath = path + str(i) + '.pth'
    if i == 0:
        features = torch.load(fpath)
    else:
        features = torch.vstack((features, torch.load(fpath)))
    print(features.shape)

In [None]:
## FINDING AN INVERSE RELATION BETWEEN AN IMAGE ID IN THE SCENE TO THE FRAME IN SCENE

img_set = []
total_imgid_to_frame = {}

for scene in cropped_imgs:
    if(len(total_imgid_to_frame.keys()) > 0):
        start = len(total_imgid_to_frame.keys())
    imgid_to_frame = {}
    people_counter = 0
    for frame in cropped_imgs[scene]:
        ppl_scene = []
        if(cropped_imgs[scene][frame]):
            for people in cropped_imgs[scene][frame]:
                imgid_to_frame[people_counter] = frame
                img_set.append(people)
                people_counter += 1
    total_imgid_to_frame[scene] = imgid_to_frame 

In [None]:
## A dict which stores how many extra non-needed frames needs to be added to this key to get the image in feature matrix
dataset_to_totalDataset = {}
start = 0
for key in dataset:
    dataset_to_totalDataset[key] = start
    start = len(dataset[key])

In [None]:
dataset_to_totalDataset

In [None]:
# Scene Image - Scene Frame Dict connection
total_imgid_to_frame

In [None]:
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-08)
# person_same_threshold = 0.15
person_same_threshold = 350

In [None]:
# net.train()

In [None]:
## SETTING UP THE QUERY IMAGE
start = time.time()
query_image = Image.open('../input/query-imgs/query_img_pres_2.png').convert('RGB')
query_dataset = torch.stack([preprocess(query_image),preprocess(query_image)])
query_dataset = query_dataset.to(device) 
net.eval()
out_features = net(query_dataset)

query_feature = out_features[0]

out_features


In [None]:
# features[dataset_to_totalDataset[scene] + 287]
dataset[scene][287]

In [None]:
## EXTRACTING THE CLOSEST PERSON PICTURES FROM THE SIMILARITY AND THRESHOLD FUNCTION

query_img_index = 287
same_guy = []
import time

scene = 'lab'
query = query_feature
query

for i in range(len(dataset[scene])):
    img_feature = features[dataset_to_totalDataset[scene] + i]
#     print(img_feature,query)
#     similarity = cos(img_feature.reshape(-1, features[dataset_to_totalDataset[scene] + i].shape[0]), query.reshape(-1, query.shape[0])).item()
    similarity = torch.dot(img_feature, query).item()
#     print (i, similarity)
    if similarity > person_same_threshold:
#         print (i, similarity)
        same_guy.append(i)
end = time.time()
print ("Total Time from Kaggle sent:",end - start)

In [None]:
same_guy

In [None]:
## DISPLAYING QUERY IMAGE
Image.open('../input/query-imgs/query_img_pres_2.png').convert('RGB')
# plt.imshow(np.array(dataset[scene][query_img_index]))
# plt.savefig('query_img.png')

img_set[287] accuracy is 96.5%

In [None]:
import matplotlib.pyplot as plt
## DISPLAYING THE CLOSEST 160 frames in our SCENE
w = 8
h = 20

loadimg = lambda index: dataset[scene][same_guy[index]].resize((250, 250))

_, axes_list = plt.subplots(h, w, figsize=(20, 40)) # define a grid of (w, h)


cnt = 0
for axes in axes_list:
    for ax in axes:
        ax.axis('off')
        cnt += 1
        ax.imshow(loadimg(cnt)) # load and show
plt.savefig('pink t shirt, query_index = 287, dot.png')
#         ax.set_title(img)

In [None]:
for i in same_guy:
    print("Frame ID:",total_imgid_to_frame[scene][i],end = ">>|<<")
    

# dataset['lab']