In [None]:
import glob
import pandas as pd
import os
import numpy as np
import torch
from skimage import io, transform
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
from torch.nn import Sequential
from torch.nn import functional as F
from torch import nn, optim
from sklearn.utils import resample
import cv2
import random
from sklearn.model_selection import train_test_split
import json
from pathlib import Path
from tqdm.notebook import tqdm
import zipfile
import torchvision

from google.colab import drive
drive.mount('/content/drive/')

files = zipfile.ZipFile('/content/drive/MyDrive/train.zip', 'r')
files.extractall('/content/')

In [None]:
path = '/content/source/'
df = pd.read_json('/content/drive/MyDrive/train.json').T
df.reset_index(level=0, inplace=True)
df.columns = ['image', 'x_label', 'y_label']
df

Unnamed: 0,image,x_label,y_label
0,video-18-frame-725.jpg,161.0,154.0
1,video-18-frame-1411.jpg,155.0,158.0
2,video-18-frame-958.jpg,157.0,152.0
3,video-18-frame-802.jpg,163.0,152.0
4,video-18-frame-1084.jpg,153.0,152.0
...,...,...,...
798,video-18-frame-67.jpg,161.0,146.0
799,video-18-frame-1300.jpg,155.0,158.0
800,video-18-frame-1292.jpg,155.0,158.0
801,video-18-frame-241.jpg,157.0,148.0


In [None]:
class VPDataset(Dataset):

    def __init__(self, df, root_dir, transform=None, test=False):
        self.df = df
        self.root_dir = root_dir
        self.transform = transform  
        self.test = test
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image']
        image = io.imread(path+img_name).astype('double')
        image /= 255
        coords = np.array([self.df.iloc[idx, 1:]]).astype('double')
        sample = image, coords
        if self.transform:
            sample = self.transform(sample)
        if self.test == True:
            return sample, img_name
        else:
            return sample

In [None]:
class Rescale(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, coords = sample
        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            new_h, new_w = self.output_size * h / w, self.output_size * w / h
        else:
            new_h, new_w = self.output_size
        new_h, new_w = int(new_h), int(new_w)
        img = transform.resize(image, (new_h, new_w))
        coords = coords * [new_w / w, new_h / h]
        return img, coords

class RandomHorizontalFlip(object):
    def __init__(self, p=0.5):
        self.p = p
    def __call__(self, sample):
        img, coords = sample
        img_center = np.array(img.shape[:2])[::-1]/2
        img_center = np.hstack((img_center, img_center))
        if random.random() < self.p:
            img =  img[:,::-1,:]
            coords[0] = img.shape[1] - coords[0]
        return img, coords

def rotate_im(image, angle):
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY
    image = cv2.warpAffine(image, M, (nW, nH))
    return image

def rotate_point(image, point, angle):
    img_rot = rotate_im(image, angle)
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)
    (nh, nw) = img_rot.shape[:2]
    (ncX, ncY) = (nw // 2, nh // 2)
    point_ = np.array([point[0] - cX, point[1]-cY])
    shift = np.array([ncX, ncY])
    cos = np.cos(angle * np.pi /180)
    sin = np.sin(angle * np.pi /180)
    rot_matrix = np.array([[cos, -sin], [sin, cos]])
    point_rot = np.dot(point_, rot_matrix) + shift
    return point_rot
    
class RandomRotation(object):
    def __call__(self, sample):
        image, coords = sample
        angle = random.randint(-45, 45)
        point_rot = rotate_point(image, coords[0], angle)
        image = rotate_im(image, angle)
        return image, point_rot

class ToTensor(object):
    def __call__(self, sample):
        image, coords = sample
        image = image.transpose((2, 0, 1))
        return torch.from_numpy(image.copy()).float(), torch.from_numpy(coords).float()

In [None]:
train, test = train_test_split(df, test_size=0.2, random_state=42)
train_set = VPDataset(df=train,
                            root_dir=path,
                            transform=transforms.Compose([
                                               RandomRotation(),
                                               Rescale((300, 300)),
                                               RandomHorizontalFlip(),
                                               ToTensor()]))
test_set = VPDataset(df=test,
                            root_dir=path,
                            transform=transforms.Compose([
                                               Rescale((300, 300)),
                                               ToTensor()]))

train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False, num_workers=0)

In [None]:
class Mobilenetv2(nn.Module):
    def __init__(self):
        super(Mobilenetv2, self).__init__()
        self.model = torch.hub.load('pytorch/vision', 'mobilenet_v2', pretrained=True)
        self.model.classifier = nn.Sequential(
            nn.Linear(in_features=self.model.classifier[1].in_features, out_features=128),
            nn.ReLU(inplace = True),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(inplace = True),
            nn.Dropout(0.3),
            nn.Linear(64, 2),
        )   
        
    def forward(self, x):
        bs, _, _, _ = x.shape
        x = self.model.features(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        x = self.model.classifier(x)
        return x

net = Mobilenetv2()

Using cache found in /root/.cache/torch/hub/pytorch_vision_master


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = net.to(device).float()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
PATH = "/content/drive/MyDrive/model_vgg.pt"

In [None]:
path = '/content/new_ds/'

df = pd.read_json('/content/new_ds/markup.json').T
df.reset_index(level=0, inplace=True)
df.columns = ['image', 'x_label', 'y_label']

class VPDataset(Dataset):

    def __init__(self, df, root_dir, transform=None, test=False):
        self.df = df
        self.root_dir = root_dir
        self.transform = transform  
        self.test = test
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['image']
        image = io.imread(path+img_name).astype('double')
        coords = np.array([self.df.iloc[idx, 1:]]).astype('double')
        sample = image, coords
        if self.transform:
            sample = self.transform(sample)
        if self.test == True:
            return sample, img_name
        else:
            return sample

class ToTensor(object):
    def __call__(self, sample):
        image, coords = sample
        image = image.transpose((2, 0, 1))
        return torch.from_numpy(image.copy()).float(), torch.from_numpy(coords).float()

train, test = train_test_split(df, test_size=0.2, random_state=42)
train_set = VPDataset(df=train,
                            root_dir=path,
                            transform=transforms.Compose([
                                               # RandomRotation(),
                                               # Rescale((300, 300)),
                                               # RandomHorizontalFlip(),
                                               ToTensor()]))
test_set = VPDataset(df=test,
                            root_dir=path,
                            transform=transforms.Compose([
                                               # Rescale((300, 300)),
                                               ToTensor()]))

train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False, num_workers=0)

class Mobilenetv2(nn.Module):
    def __init__(self):
        super(Mobilenetv2, self).__init__()
        self.model = torchvision.models.mobilenet_v2(pretrained=False)
        self.model.classifier = nn.Sequential(
            nn.Linear(in_features=self.model.classifier[1].in_features, out_features=128),
            nn.ReLU(inplace = True),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(inplace = True),
            nn.Dropout(0.3),
            nn.Linear(64, 2),
        )   
        
    def forward(self, x):
        bs, _, _, _ = x.shape
        x = self.model.features(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        x = self.model.classifier(x)
        return x

net = Mobilenetv2()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = net.to(device).float()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
PATH = "/content/model_mob_3.pt"

n_epochs = 1000
val_loss_min = np.Inf
total_step = len(train_loader)
for epoch in range(n_epochs):
    train_loss = 0
    model.train()
    for images, labels in train_loader:
        images = images.to(device)
        coords = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, coords.squeeze())
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    print ("Epoch {}, Train Loss: {:.4f}".format(epoch, train_loss/len(train_loader)))
    
    model.eval()
    with torch.no_grad():
        
        val_loss = 0
        for images, labels in test_loader:
            images = images.to(device)
            coords = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, coords.squeeze())
            val_loss += loss.item()
    print ("Epoch {}, Test Loss: {:.4f}".format(epoch, val_loss/len(test_loader)))
        
    if val_loss < val_loss_min:
        torch.save(net.state_dict(), PATH)
        val_loss_min = val_loss
        print('Saving model...')

Epoch 0, Train Loss: 24458.2061
Epoch 0, Test Loss: 24857.3529
Saving model...
Epoch 1, Train Loss: 19130.5162
Epoch 1, Test Loss: 22416.3815
Saving model...
Epoch 2, Train Loss: 9283.9165
Epoch 2, Test Loss: 18339.9473
Saving model...
Epoch 3, Train Loss: 1808.7818
Epoch 3, Test Loss: 9674.1354
Saving model...
Epoch 4, Train Loss: 1493.0702
Epoch 4, Test Loss: 8278.9956
Saving model...
Epoch 5, Train Loss: 1297.0086
Epoch 5, Test Loss: 1752.3406
Saving model...
Epoch 6, Train Loss: 1134.3629
Epoch 6, Test Loss: 2090.7677
Epoch 7, Train Loss: 1256.9792
Epoch 7, Test Loss: 521.7051
Saving model...
Epoch 8, Train Loss: 1178.1088
Epoch 8, Test Loss: 717.9218
Epoch 9, Train Loss: 1235.5069
Epoch 9, Test Loss: 519.5559
Saving model...
Epoch 10, Train Loss: 1236.0736
Epoch 10, Test Loss: 720.0445
Epoch 11, Train Loss: 1187.7287
Epoch 11, Test Loss: 1259.8217
Epoch 12, Train Loss: 1133.4341
Epoch 12, Test Loss: 514.8679
Saving model...
Epoch 13, Train Loss: 1092.0301
Epoch 13, Test Loss: 479.

In [None]:
df_test = pd.read_json('/content/test/markup.json').T
df_test.reset_index(level=0, inplace=True)
df_test.columns = ['image', 'x_label', 'y_label']

test_set = VPDataset(df=df_test,
                            root_dir='/content/test_2/',
                            transform=transforms.Compose([
                                                          # RandomRotation(),
                                              #  Rescale((300, 300)),
                                               ToTensor()]),
                            test=True)

test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0)

model.load_state_dict(torch.load('/content/model_mob_3.pt'))
model.eval()
results = {}
dict_save = {}
with torch.no_grad():
        for (image, label), img_name in tqdm(test_loader):
            image = image.to(device)
            coord = label.to(device)
            results[img_name[0]] = model(image).tolist()[0]

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))




In [None]:
with open('/content/predicted.json', 'w') as f:
        json.dump(results, f)

In [None]:
def read_from_json(path):
    answers = dict()

    with open(path, 'r') as json_file:
        answers = json.load(json_file)

    return answers
answers = read_from_json('/content/predicted.json')
gt = read_from_json('/content/new_ds/markup.json')
answers

{'video-18-frame-1012.jpg': [184.2267303466797, 165.25729370117188],
 'video-18-frame-1077.jpg': [174.3859100341797, 118.67231750488281],
 'video-18-frame-1079.jpg': [122.66914367675781, 181.65625],
 'video-18-frame-1187.jpg': [174.4295654296875, 115.2681884765625],
 'video-18-frame-1205.jpg': [170.3206329345703, 186.00027465820312],
 'video-18-frame-1219.jpg': [200.77931213378906, 152.51803588867188],
 'video-18-frame-1231.jpg': [143.15139770507812, 186.08456420898438],
 'video-18-frame-1239.jpg': [178.7899169921875, 152.90907287597656],
 'video-18-frame-1246.jpg': [189.56640625, 154.51785278320312],
 'video-18-frame-1286.jpg': [141.0116424560547, 161.17637634277344],
 'video-18-frame-1296.jpg': [166.2876739501953, 123.81752014160156],
 'video-18-frame-1391.jpg': [127.1483154296875, 172.91336059570312],
 'video-18-frame-1405.jpg': [132.2098846435547, 172.44940185546875],
 'video-18-frame-1416.jpg': [173.70704650878906, 151.62229919433594],
 'video-18-frame-1449.jpg': [201.971389770507