- Bigger crops ?

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Imports

In [None]:
import os
import re
import cv2
import time
import torch
import imageio
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from collections import Counter
from tqdm.notebook import tqdm
from skimage.transform import resize

In [None]:
from params import *
from utils.torch import seed_everything

## Data

### Load

In [None]:
df_train = pd.read_csv(DATA_PATH + 'df_train.csv')

In [None]:
df_train['impact'] -= 1

In [None]:
df_train.dropna(inplace=True)

In [None]:
frame_impacts = df_train[["image_name", "extended_impact"]].groupby('image_name').max().reset_index()
frame_impacts = frame_impacts.rename(columns={"extended_impact": "frame_has_impact"})
df_train = df_train.merge(frame_impacts, on="image_name")
# df_train = df_train[df_train['frame_has_impact'] == 1]

In [None]:
folds = pd.read_csv(OUT_DIR + "folds.csv")
df_train = df_train.merge(folds, on="video")

## Dataset

In [None]:
from data.dataset import NFLDatasetCls
from data.transforms import get_transfos_cls

In [None]:
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset

class NFLDatasetCls3D(Dataset):
    def __init__(self, df, transforms=None, target_name="impact", root="", stride=1, n_frames=9):
        super().__init__()
        self.transforms = transforms
        self.target_name = target_name
        self.root = root
        self.stride = stride
        self.n_frames = n_frames
        
        self.groups = df.groupby(["video", "label"]).agg(list).reset_index()
        self.groups = self.groups[self.groups["crop_name"].apply(len) >= n_frames * stride].reset_index(drop=True)

        self.players = (df['video'] + "_" + df['label']).values

    def __len__(self):
        return len(self.groups)

    def __getitem__(self, idx):
        image_names = np.array(self.groups["crop_name"][idx])
        targets = np.array(self.groups[self.target_name][idx])
        
        margin = self.n_frames // 2 * self.stride
        mid_frame = np.random.randint(margin, len(targets) - margin)
        indices = np.array([mid_frame + i * self.stride for i in range(-self.n_frames // 2 + 1, self.n_frames // 2 + 1)]) 
        
        images = [cv2.imread(f"{self.root}/{img}")  for img in image_names[indices]]
        target = np.max(targets[indices[0]:indices[-1]])

#         if self.transforms:
#             image = self.transforms(image=image)["image"]

        return images, target


In [None]:
dataset = NFLDatasetCls3D(
    df_train.copy(),
    transforms=get_transfos_cls(visualize=True, train=True),
    root=CROP_PATH,
    target_name='extended_impact',
    n_frames=5,
    stride=3,
)

In [None]:
i = 0

images, y = dataset[i]

# plt.figure(figsize=(4, 4))
# plt.imshow(image.numpy().transpose(1, 2, 0).copy())
# plt.title(f'Sample #{i}, y={y}')
# plt.show()

In [None]:
i = np.random.choice(len(dataset))
seed_everything(0)

for i in np.random.choice(len(dataset), 100):
    images, y = dataset[i]
    
    if y:
        plt.figure(figsize=(15, 15))
        for i, image in enumerate(images):
            plt.subplot(3, 3, i + 1)
#             plt.imshow(image.numpy().transpose(1, 2, 0).copy())
            plt.imshow(image)
            plt.title(f'Sample #{i}, y={y}')
        plt.show()

## Model

In [None]:
from model_zoo.models_cls import get_model_cls

In [None]:
model = get_model_cls('efficientnet-b0')

In [None]:
dataset = NFLDatasetCls(
    df_train.copy(),
    transforms=get_transfos_cls(visualize=False, train=True),
    root=CROP_PATH,
)

In [None]:
image, y = dataset[0]

In [None]:
pred = model(
    image.unsqueeze(0),
)

In [None]:
pred

## Training

In [None]:
from training.main_cls import k_fold_cls

In [None]:
from utils.logger import prepare_log_folder, save_config, create_logger

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    target_name = "extended_impact"
    seed = 42
    verbose = 1
    img_path = CROP_PATH
    device = "cuda" if torch.cuda.is_available() else "cpu"
    save_weights = True

    # k-fold
    k = 5
    random_state = 0
    selected_folds = [0, 1, 2, 3, 4]

    # Model
    name = "efficientnet-b4"  #"resnet18", "resnet34"
    num_classes = 1

    # Training       
    batch_size = 64
    samples_per_player = int(5 / 64 * batch_size)
    optimizer = "Adam"
    
    acc_steps = 1
    epochs = 20
    swa_first_epoch = 15

    lr = 1e-3  # 5e-4 / 1e-3
    warmup_prop = 0.05
    val_bs = batch_size * 2
    
    first_epoch_eval = 0


In [None]:
DEBUG = False
log_folder = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH_CLS)
    print(f'Logging results to {log_folder}')
    config_df = save_config(Config, log_folder + 'config.json')
    create_logger(directory=log_folder, name="logs.txt")

pred_oof = k_fold_cls(
    Config,
    df_train,
    log_folder=log_folder
)