In [1]:
#from dataloader import GolfDB, Normalize, ToTensor
from model import EventDetector
from util import *
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
import os
import torchvision
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import cv2
import tqdm


# training configuration
split = 1
iterations = 30
it_save = 150  # save model every 100 iterations
n_cpu = 6
seq_length = 32
bs = 16  # batch size
k = 10  # frozen layers

model = EventDetector(pretrain=False,
                      width_mult=1.,
                      lstm_layers=1,
                      lstm_hidden=256,
                      bidirectional=True,
                      dropout=False)
try:
    save_dict = torch.load('models/swingnet_1800.pth.tar')
except:
    print("Model weights not found. Download model weights and place in 'models' folder. See README for instructions")

freeze_layers(k, model)
model.train()
model.cuda();


In [2]:
path = 'train_dataset'

files = set(os.listdir(os.path.join(path, 'positions_of_the_golf_swing_train')))
p_syst = pd.read_excel(os.path.join(path, 'positions_markup_train.xlsx'))
p_syst = p_syst.loc[p_syst['P1'] != '-', :]
p_syst = p_syst.loc[p_syst['Название видео'].isin(files)]
names = list(p_syst.columns)
names[0] = 'filename'
p_syst.columns = names
p_syst['events'] = p_syst.iloc[:,1:].values.tolist()
p_syst['path'] = p_syst['filename'].apply(lambda x: os.path.join(path, 'positions_of_the_golf_swing_train', x))
p_syst['events']= p_syst['events'].apply(lambda x: [max(x[0]-1, 0)] + x + [x[-1]])
p_syst = p_syst.reset_index(drop = True)

In [3]:
RESNET_trasforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize(
        size=(160, 160)
    ),
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

class GolfDB(Dataset):
    def __init__(self, data_file, seq_length, transform=None, train=True):
        self.df = data_file
        self.seq_length = seq_length
        self.transform = transform
        self.train = train

    def __len__(self):
        return self.df.shape[0] - 1

    def __getitem__(self, idx):
        a = self.df.loc[idx, :]  # annotation info
        images, labels = [], []
        cap = cv2.VideoCapture(a['path'])
        start_frame = np.random.randint(a['events'][0], a['events'][-1] + 1)
        events = a['events']
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
        pos = start_frame
        while len(images) < self.seq_length:

            if pos == -1:
                images, labels = [], []
                cap = cv2.VideoCapture(a['path'])
                start_frame = a['events'][0]
                events = a['events']
                cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
                pos = start_frame
            ret, img = cap.read()
            if ret:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                images.append(img)
                if pos in events[1:-1]:
                    labels.append(np.where(np.array(events[1:-1]) == pos)[0][0])
                else:
                    labels.append(8)
                pos += 1
            else:
                cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
                pos = -1
        cap.release()
        images = torch.Tensor(np.asarray(images)) / 255
        images = images.permute(0, 3, 1, 2)
        if self.transform:
            images = self.transform(images)
        sample = {'images': images, 'labels':np.asarray(labels)}
        return sample

In [4]:
train_dataset = GolfDB(p_syst, seq_length, transform=RESNET_trasforms )
data_loader = DataLoader(train_dataset,
                         batch_size=bs,
                         shuffle=True,
                         num_workers=1,
                         drop_last=True)

In [5]:


# the 8 golf swing events are classes 0 through 7, no-event is class 8
# the ratio of events to no-events is approximately 1:35 so weight classes accordingly:
weights = torch.FloatTensor([1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/8, 1/35]).cuda()
criterion = torch.nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001)

losses = AverageMeter()

In [6]:
if not os.path.exists('models'):
    os.mkdir('models')

for i in tqdm.tqdm_notebook(range(iterations)):
    loss_vals = []
    for sample in (data_loader):
        images, labels = sample['images'].cuda(), sample['labels'].cuda()
        logits = model(images)
        labels = labels.view(bs*seq_length)
        loss = criterion(logits, labels)
        optimizer.zero_grad()
        loss.backward()
        losses.update(loss.item(), images.size(0))
        optimizer.step()
        loss_vals.append(losses.avg)
        #print('Iteration: {}\tLoss: {loss.val:.4f} ({loss.avg:.4f})'.format(i, loss=losses))
        i += 1
        if i % it_save == 0:
            torch.save({'optimizer_state_dict': optimizer.state_dict(),
                        'model_state_dict': model.state_dict()}, '/home/data/d.kashin/temp/train_dataset/models/swingnetv2_{}.pth.tar'.format(i))
        if i == iterations:
            break
    print(np.mean(loss_vals))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


  0%|          | 0/30 [00:00<?, ?it/s]

2.0555479998389883
1.7397555571936425
1.564717691323974
1.4824351498604693
1.4584429556484027
1.423753201986058
1.398185340985201
1.3805957906600814
1.365717230074809
1.3496634049705296
1.3388256221235864
1.3325194006844143
1.3243459293318374
1.3172410115932311
1.3109214421798168
1.3043809798386619
1.300060903404083
1.2952399007224618
1.2933328802749138
1.2876440260229405
1.2802545436926647
1.2749334131081052
1.2683306403845618
1.2636879274696147
1.2557612643344702
1.2491177863033482
1.2407576975685681
1.2333725296712086
1.2285333487632601
1.2251272656415637


In [7]:
losses.avg

1.2251272656415637

In [9]:
 torch.save({'optimizer_state_dict': optimizer.state_dict(),
                        'model_state_dict': model.state_dict()}, '/home/data/d.kashin/temp/train_dataset/models/swingnetv2_30.pth.tar'.format(i))

In [10]:
model

EventDetector(
  (cnn): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
        (3