First we import all the libraries we need.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
from IPython.display import Video
import cv2
import math
import torch
import glob
from tqdm.auto import tqdm
from multiprocessing import Pool, cpu_count
import time
import argparse
import logging
from sys import exit
import shutil

from timm.models import create_model, apply_test_time_pool
from timm.data import ImageDataset, create_loader, resolve_data_config
from timm.utils import AverageMeter, setup_default_logging

  from .autonotebook import tqdm as notebook_tqdm


We do some setting up here. First we define `debug`. If `debug` is set then that means that we do not actually want to train the model we're just checking if everything is working. That means small number of epochs and small amount of videos to train on.

We will try different error tolrances for each type of event and tune it as a hyper parameter. We basically instead of giving the model a second where an event happens we give it a range of [event_timestamp - err_tol, event_timestamp + err_tol] and train it on that. 

Since the dataset is very small, we dine the train/validation split manually. Once everything is ready we will try different combinations manually and keep the one that gives the best result.

In [2]:
debug = False
if debug:
    epochs = 3
else:
    epochs = 20

err_tol = {
    'challenge': [ 0.30, 0.40, 0.50, 0.60, 0.70 ],
    'play': [ 0.15, 0.20, 0.25, 0.30, 0.35 ],
    'throwin': [ 0.15, 0.20, 0.25, 0.30, 0.35 ]
}
video_id_split = {
    'val':[
         '3c993bd2_0',
         '3c993bd2_1',
    ],
    'train':[
         '1606b0e6_0',
         '1606b0e6_1',
         '35bd9041_0',
         '35bd9041_1',
         '407c5a9e_1',
         '4ffd5986_0',
         '9a97dae4_1',
         'cfbe2e94_0',
         'cfbe2e94_1',
         'ecf251d4_0',
    ]
}
event_names = ['challenge', 'throwin', 'play']

We then load the data and remove every value of the form [id, timestamp, event_type, attr] (except for types `start` and `end`) and add two values in its place [id, timestamp - err_tol, start_event_type, attr] and [id, timestamp + err_tol, end_event_type, attr].

In [3]:
df = pd.read_csv("../dfl-bundesliga-data-shootout/train.csv")
additional_events = []
for arr in df.sort_values(['video_id','time','event','event_attributes']).values:
    if arr[2] in err_tol:
        tol = err_tol[arr[2]][0]/2
        additional_events.append([arr[0], arr[1]-tol, 'start_'+arr[2], arr[3]])
        additional_events.append([arr[0], arr[1]+tol, 'end_'+arr[2], arr[3]])
df = pd.concat([df, pd.DataFrame(additional_events, columns=df.columns)])
df = df[~df['event'].isin(event_names)]
df = df.sort_values(['video_id', 'time'])
df

Unnamed: 0,video_id,time,event,event_attributes
0,1606b0e6_0,200.265822,start,
0,1606b0e6_0,201.000000,start_challenge,['ball_action_forced']
1,1606b0e6_0,201.300000,end_challenge,['ball_action_forced']
2,1606b0e6_0,202.765822,end,
3,1606b0e6_0,210.124111,start,
...,...,...,...,...
11214,ecf251d4_0,3058.072895,end,
11215,ecf251d4_0,3068.280519,start,
8762,ecf251d4_0,3069.472000,start_throwin,['pass']
8763,ecf251d4_0,3069.622000,end_throwin,['pass']


In the next cell we go over every video and split it into photos. We assign 4 different kinds of photos. If a certain frame falls between the start and end of a certain event then the photo of that frame is assigned to that type. If a certain frame doesn't fall in any event then we assign it to type `background` which means no event is happening in this frame.

In [4]:
def extract_training_images(args):
        video_id, split = args
        video_path = f"../dfl-bundesliga-data-shootout/train/{video_id}.mp4"
        cap = cv2.VideoCapture(video_path)

        fps = cap.get(cv2.CAP_PROP_FPS)
        time_interval = 1/fps

        df_video = df[df.video_id == video_id]
        if debug:
            df_video = df_video.head(10)
        print(split, video_id, df_video.shape)

        arr = df_video[['time','event']].values
        for idx in range(len(arr)-1):
            crr_frame = int(math.ceil(arr[idx,0] * fps))
            nxt_frame = int(math.ceil(arr[idx+1,0] * fps))
            crr_event = arr[idx,1]

            crr_event = crr_event
            if crr_event == 'start':
                crr_status = 'background'
            elif crr_event == 'end':
                # should use as background?
                continue
            else:
                start_or_end, crr_status = crr_event.split('_', 1)
                if start_or_end == 'end':
                    crr_status = 'background'

            result_dir = f"../work/split_images/{split}/{crr_status}"
            if not os.path.exists(result_dir):
                os.makedirs(result_dir, exist_ok=True)

            this_frame = crr_frame
            while this_frame < nxt_frame:
                frame_num = this_frame

                cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
                ret, frame = cap.read()
                out_file = f'{result_dir}/{video_id}_{frame_num:06}.jpg'
                cv2.imwrite(out_file, frame)

                if crr_status == 'background':
                    this_frame += 10
                else:
                    this_frame += 1

#shutil.rmtree('../work/split_images')
#for split in video_id_split:
#    video_ids = video_id_split[split]
#    for video_id in video_ids:            
#        extract_training_images([video_id, split])
#print('done')

Training was causing memory problems, we used the following line to elevate the problem.

In [5]:
torch.cuda.empty_cache()

We use the pretrianed tf_efficientnet_b5_ap model from the timm library.

In [6]:
shutil.rmtree('./output')

In [7]:
%run /git_workspace/PML_project/PMLDL-project/image_models/train.py /git_workspace/PML_project/PMLDL-project/work/split_images/ \
    -b 4 \
    --amp \
    --epochs 20 \
    --pretrained \
    --num-classes 4 \
    --model tf_efficientnet_b5_ap \
    --experiment dfl-benchmark-training-fix-extract-images


Training with a single process on 1 GPUs.
Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ap-9e82fae8.pth)
Model tf_efficientnet_b5_ap created, param count:28348980
Data processing configuration for current model + dataset:
	input_size: (3, 456, 456)
	interpolation: bicubic
	mean: (0.5, 0.5, 0.5)
	std: (0.5, 0.5, 0.5)
	crop_pct: 0.934
Using native Torch AMP. Training in mixed precision.
Scheduled epochs: 30
Test: [   0/2145]  Time: 8.083 (8.083)  Loss:  0.3076 (0.3076)  Acc@1: 100.0000 (100.0000)  Acc@5: 100.0000 (100.0000)
Test: [  50/2145]  Time: 0.061 (0.219)  Loss:  0.4253 (0.5850)  Acc@1: 100.0000 (84.3137)  Acc@5: 100.0000 (100.0000)
Test: [ 100/2145]  Time: 0.062 (0.142)  Loss:  0.7939 (0.6154)  Acc@1: 75.0000 (83.1683)  Acc@5: 100.0000 (100.0000)
Test: [ 150/2145]  Time: 0.061 (0.115)  Loss:  0.5273 (0.5852)  Acc@1: 100.0000 (86.9205)  Acc@5: 100.0000 (100.0000)
Test: [ 200/2145]  Time: 0.0

We take the training checkpoints and average the weights from the last few ones using the script provided by timm to save it as a model so we don't have to train it every time.

In [10]:
os.remove('/git_workspace/PML_project/PMLDL-project/model/tf_efficientnet_b5_ap-456-fix.pt')

In [11]:
%run /git_workspace/PML_project/PMLDL-project/image_models/avg_checkpoints.py --input /git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images \
    --output /git_workspace/PML_project/PMLDL-project/model/tf_efficientnet_b5_ap-456-fix.pt

=> Extracting metric from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\checkpoint-0.pth.tar'
=> Extracting metric from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\checkpoint-1.pth.tar'
=> Extracting metric from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\last.pth.tar'
=> Extracting metric from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\model_best.pth.tar'


Loaded state_dict from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\checkpoint-0.pth.tar'


Selected checkpoints:
53.73412559711057 /git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\checkpoint-0.pth.tar
57.6954444832809 /git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\checkpoint-1.pth.tar
57.6954444832809 /git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\last.pth.tar
57.6954444832809 /git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\model_best.pth.tar


Loaded state_dict from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\checkpoint-1.pth.tar'
Loaded state_dict from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\last.pth.tar'
Loaded state_dict from checkpoint '/git_workspace/PML_project/PMLDL-project/notebooks/output/train/dfl-benchmark-training-fix-extract-images\model_best.pth.tar'


=> Saved state_dict to '/git_workspace/PML_project/PMLDL-project/model/tf_efficientnet_b5_ap-456-fix.pt, SHA256: 26224193caa196fd45979e16a519af99220119cfa11cfe3b5e9df76e2fd93eda'
