In [None]:
# install required libraries here

!pip install av kaggle fastai fastkaggle pandas numpy

In [16]:
from fastkaggle import *
from google.colab import files

from fastai.data.all import *
from fastai.vision.all import *
from fastai.vision.widgets import *

In [None]:
files.upload()

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [20]:
comp = 'nfl-player-contact-detection'

path = setup_comp(comp, install='fastai "timm>=0.6.2.dev0"')

path.ls()

Downloading nfl-player-contact-detection.zip to /content


100%|██████████| 3.84G/3.84G [00:34<00:00, 119MB/s]





## Data Exploration

The most important thing for us to do now that we have the data is to make sure we understand what it is we are given!

We do this by looking at the data description on the Kaggle site: https://www.kaggle.com/competitions/nfl-player-contact-detection

and printing out some of the data below.

In [None]:
from IPython.display import display, Video

# define the path to the test video files
test_data = os.path.join(path, 'train')

# get_files is a fast.ai function
video_paths = get_files(test_data, extensions='.mp4')

display(Video(str(video_paths[0]),embed=True,width=255,height=255))
display(Video(str(video_paths[1]),embed=True,width=255,height=255))
display(Video(str(video_paths[2]),embed=True,width=255,height=255))

There are three video angles of each play, Sideline, Endzone and All 29

There are also csv files:

* `train_labels.csv` - These are the labelled contacts for the videos in the /train folder for every player combination. What this means is that for every single frame of video we have an indicator saying whether player 1 has EVERY other player, player 2 has contacted EVERY other player and so on. This is why this file is over 400MB,it is basically a registry of (player_x * num_players) * num_frames + contacted_occured 

* `train_baseline_helmets.csv` - These are baseline helmet detection and assignment boxes for the training and test set. These are useful when predicting contacts. It provides the bounding boxes for all detected helmets. Not all helmets are detected in every frame.

* `train_player_tracking.csv` -  This is 10 Hz tracking data for each player on the field during the provided plays. What this means is that for every 1/10th of a second, we have the location, acceleration and direction of each player. This is useful for numerous reasons, including figuring out exactly how close players are to each other.

* `train_video_metadata.csv` - contains timestamps associated with each Sideline and Endzone view for syncing with the player tracking data.

## Feature Engineering

Reading the description of the columns in the train_labels.csv, the contact_id column is an amalgamation of several potentially useful data points. 

Lets do some feature engineering to parse them out into their own columns.

In [None]:
def expand_contact_id(df):
    """
    Splits out contact_id into seperate columns.
    """
    df["game_play"] = df["contact_id"].str[:12]
    df["step"] = df["contact_id"].str.split("_").str[-3].astype("int")
    df["nfl_player_id_1"] = df["contact_id"].str.split("_").str[-2]
    df["nfl_player_id_2"] = df["contact_id"].str.split("_").str[-1]
    return df

In [None]:
labels = expand_contact_id(pandas.read_csv(os.path.join(path, "train_labels.csv")))
print("Number of labelled contacts : ",len(labels))
train_tracking = pandas.read_csv(os.path.join(path, "train_player_tracking.csv"))
print("Number of tracking records : ",len(train_tracking))
train_helmets = pandas.read_csv(os.path.join(path, "train_baseline_helmets.csv"))
print("Number of helmet detections : ",len(train_helmets))
train_video_metadata = pandas.read_csv(os.path.join(path, "train_video_metadata.csv"))

Next, flatten player contact data (train_labels.csv) and player tracking data (train_player_tracking.csv) into one dataframe.

In [None]:
import av

def extract_frames(video_path):
  '''Convert video to PIL images'''
  video = av.open(str(video_path))
  for frame in video.decode(0):
    yield frame.to_image()

In [None]:
# define the path to the test video files
test_data = os.path.join(path, 'test')

# get_files is a fast.ai function
video_paths = get_files(test_data, extensions='.mp4')

# verify that a subset of the paths look correct
video_paths[0:4]

In [None]:
# generate several frames from the first video to ensure correctness

frames = list(extract_frames(video_paths[0]))

# verify images were created successfully

show_images(frames[0:5])

We now want to export all videos as frames and place them in a folder of the same name.

In [None]:
path_frames = path.parent/'nfl-frames'

In [None]:
def mp42frames(video_path, path_frames=path_frames, force=False):
    "Extract frames from mp4 file to jpgs"
    dest_path = path_frames/video_path.relative_to(video_path.parent.parent).with_suffix('')
    if not dest_path.exists() or force:
        dest_path.mkdir(parents=True, exist_ok=True)
        for i, frame in enumerate(extract_frames(video_path)):
            frame.save(dest_path/f'{i}.jpg')