In [None]:
import os
import gc
import cv2
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
from tqdm.auto import tqdm

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pytorch_toolbelt import losses as L
import warnings
warnings.filterwarnings("ignore")

## using gpu:1
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

def seed_everything(seed=123):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything()

In [None]:
class Customize_Model(nn.Module):
    def __init__(self):
        super().__init__()
        model_name = 'csn_r101'
        self.model = torch.hub.load('facebookresearch/pytorchvideo', model_name, pretrained=True)
        self.model.blocks[-1].proj= nn.Linear(in_features=2048, out_features=12, bias=True)
#         self.model.projection.model= nn.Linear(in_features=2048, out_features=cls, bias=True)
        
    def forward(self, image):
#         x= nn.functional.interpolate(image, 
#                                      size=(32,CFG['img_size'],CFG['img_size']), 
#                                      scale_factor=None, 
#                                      mode='nearest', 
#                                      align_corners=None)
        x = self.model(image)
        return x
    
# print(torch.hub.list('facebookresearch/pytorchvideo'))
# model= Customize_Model()
# x= torch.rand(1,3,16,512,512)
# x= model(x)
# x.shape

In [None]:
def get_train_transform(img_size):
    return A.Compose([
        A.Resize(img_size, img_size),
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
        A.HorizontalFlip(p=0.5),
#         A.VerticalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.15, rotate_limit= 10,
                                        interpolation=cv2.INTER_LINEAR, border_mode=0, p=0.7),
        ToTensorV2(p=1.0),
    ], keypoint_params=A.KeypointParams(format='xy'))


def get_test_transform(img_size):
    return A.Compose([
        A.Resize(img_size, img_size),
        ToTensorV2(p=1.0),
    ], keypoint_params=A.KeypointParams(format='xy'))

def get_resize_transform(height, width):
    return A.Compose([
        A.Resize(width, height, p=1),
    ])

In [None]:
def read_video(path):
    imgs= []
    cap= cv2.VideoCapture(path)
    while cap.isOpened():
        ret, img = cap.read()
        if not ret: break
        img= cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        imgs.append(img)
    ## resize z-axis
    imgs= np.array(imgs)
    imgs= get_resize_transform(imgs.shape[1], CFG["depth"])(image= imgs)['image']
    
    return np.array(imgs) ## (img_len, H, W)


class Customize_Dataset(Dataset):
    def __init__(self, df, transforms=None, is_train=True):
        self.df = df
        self.transforms = transforms
        self.is_train= is_train
    
    def __getitem__(self, index):
        data = self.df.loc[index]
        balltype= int(data['BallType'])  ## class 0 is empty
        cls= [0]*10
        cls[balltype]= 1
        balltype= np.array(cls)
        landing= np.asarray([ data['LandingX'], data['LandingY'] ], dtype= np.int).tolist()
        
        img= read_video(data['image_path'])
        img = img.transpose(1,2,0)
        
        if self.transforms:
            while True:
                trans= self.transforms(image=img, keypoints=[landing])
                aug_img= trans["image"]
                aug_landing= trans["keypoints"]
                if len(aug_landing)!=0: 
                    img= aug_img
                    landing= aug_landing
                    break
        
        ## convert to 3 channel
        img= img.unsqueeze(dim=0)
        img= img.expand(3,img.shape[1],img.shape[2],img.shape[3])
        
        label= np.append(balltype, landing[0], axis=0)
        return {
            'image': torch.tensor(img/255, dtype=torch.float32),
            'balltype': torch.tensor(balltype, dtype=torch.long),
            'landing': torch.tensor(landing[0], dtype=torch.float32),
            'label': torch.tensor(label, dtype=torch.float32),
        }
    
    def __len__(self):
        return len(self.df)

In [None]:
class Customize_loss(nn.Module):
    def  __init__(self):
        super().__init__()
        self.CE= nn.CrossEntropyLoss(weight= None, label_smoothing=0.05)
        self.mse= nn.MSELoss()
    
    def forward(self, y_pred, y_true):
        loss_cls= self.CE(y_pred[..., :-2], y_true[..., :-2])
#         loss_reg= self.mse(y_pred[..., -2:], y_true[..., -2:])
        loss= loss_cls #+ loss_reg
        return loss

In [None]:
def train_epoch(dataloader, model, criterion, optimizer):
    scaler= amp.GradScaler()
    model.train()

    ep_loss= []
    for i, data in enumerate(tqdm(dataloader)):

        imgs= data['image'].to('cuda')
        labels= data['label'].to('cuda')
        
        with amp.autocast():
            preds= model(imgs)
            loss= criterion(preds, labels)
            ep_loss.append(loss.item())
            loss/= CFG['gradient_accumulation']
            scaler.scale(loss).backward()
            
            if (i+1) % CFG['gradient_accumulation']== 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                
    return np.mean(ep_loss)

In [None]:
from metrics import *
from sklearn.metrics import recall_score, roc_auc_score

def AUC_score(all_pred, all_label):
    auc= roc_auc_score(all_label, all_pred, multi_class='ovo')
    return auc

def valid_epoch(dataloader, model, criterion):
    model.eval()
    
    ep_loss= []
    all_pred= []
    all_label= []
    for i, data in enumerate(tqdm(dataloader)):

        imgs= data['image'].to('cuda')
        labels= data['label'].to('cuda')
        all_label.extend(labels[...,1:-2].cpu().numpy())
        
        with torch.no_grad():
            preds= model(imgs)
            loss= criterion(preds, labels)
            ep_loss.append(loss.item())
        all_pred.extend(preds[...,1:-2].cpu().softmax(dim=-1).numpy())
        
    
    ## caculate metrics
    all_label= np.array(all_label).argmax(-1)
    all_pred= np.array(all_pred)
    
    acc= Accuracy(all_pred, all_label)
    print(f'accuracy: {acc}')
    recall= Mean_Recall(all_pred, all_label)
    print(f'mean_recall: {recall}')
    auc= AUC_score(all_pred, all_label)
    print(f'AUC: {auc}')
    
    score= auc
    return np.mean(ep_loss), score

# CFG

In [None]:
CFG= {
    'fold': 3,
    'epoch': 50,
    
    'img_size': 512,
    'depth': 32,
    
    'batch_size': 1,
    'gradient_accumulation': 1,
    
    'lr': 1e-4,
    'weight_decay': 1e-4,
    
    'load_model': False, #'./test_model/hitframe/cv1_best.pth'
    'save_model': './train_model'
}
CFG

# Prepare Dataset

In [None]:
df= pd.read_csv('data/train_balltype_land.csv')
df['BallType'].hist()

In [None]:
df= df[df['HitFrame']!=0]
train_dataset= df[df['fold']!=CFG['fold']].reset_index(drop=True)
new_df= ''
N= 300
for l  in range(1,10):
    temp_df= train_dataset[train_dataset['BallType']==l]
    try: temp_df= temp_df.sample(n=N, replace=False, random_state=1)
    except: temp_df= temp_df.sample(n=N, replace=True, random_state=1)
    if len(new_df)==0: new_df= temp_df
    else: new_df= pd.concat([new_df, temp_df], axis=0)
train_dataset= new_df.reset_index(drop=True)

valid_dataset= df[df['fold']==CFG['fold']].reset_index(drop=True)
print(f'train dataset: {len(train_dataset)}')
print(f'valid dataset: {len(valid_dataset)}')

train_dataset= Customize_Dataset(train_dataset.iloc[:], get_train_transform(CFG['img_size']), is_train=True)
valid_dataset= Customize_Dataset(valid_dataset.iloc[:], get_test_transform(CFG['img_size']), is_train=False)

train_loader= DataLoader(train_dataset, batch_size=CFG['batch_size'], shuffle=True, num_workers=0)
valid_loader= DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0)
df.head()

# Train

In [None]:
## create model
if CFG['load_model']:
    print(f"load_model: {CFG['load_model']}")
    model= torch.load(CFG['load_model'], map_location= 'cuda')
else:
    model= Customize_Model()
model.to('cuda')
    
## hyperparameter
criterion= Customize_loss()
optimizer= optim.AdamW(model.parameters(), lr= CFG['lr'], weight_decay= CFG['weight_decay'])

## start training
best_score= 0
for ep in range(1, CFG['epoch']+1):
    print(f'\nep: {ep}')
    
    train_loss= train_epoch(train_loader, model, criterion, optimizer)
    valid_loss, valid_acc= valid_epoch(valid_loader, model, criterion)
    print(f'train loss: {round(train_loss, 5)}')
    print(f'valid loss: {round(valid_loss, 5)}, valid_acc: {round(valid_acc, 5)}')
    
    if valid_acc >= best_score:
        best_score= valid_acc
        torch.save(model, f"{CFG['save_model']}/cv{CFG['fold']}_best.pth")
        print(f'model save at score: {round(best_score, 5)}')
        
    ## save model every epoch
    torch.save(model, f"{CFG['save_model']}/cv{CFG['fold']}_ep{ep}.pth")