In [None]:
import os
import gc
import cv2
import time
import random
import glob
from PIL import Image
import  matplotlib.pyplot as plt

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
from pytorch_toolbelt import losses as L

# Utils
from tqdm.auto import tqdm

# For Image Models
import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

## using gpu:1
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

def seed_everything(seed=123):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything()

In [None]:
class Customize_Model(nn.Module):
    def __init__(self, model_name, cls):
        super().__init__()
        
    def forward(self, image):
        x = self.model(image)
        return x

In [None]:
def get_test_transform(img_size):
    return A.Compose([
        A.Resize(img_size, img_size),
        ToTensorV2(p=1.0),
    ])


def read_video(path, 
               video_load_frac=1.0):
    imgs= []
    cap= cv2.VideoCapture(path)
    while cap.isOpened():
        ret, img = cap.read()
        if not ret: break
        img= cv2.resize(img, None, fx=video_load_frac, fy=video_load_frac)
        img= cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        imgs.append(img)
    
    return np.array(imgs)  ## (img_len, H, W)

def read_label(path):
    df= pd.read_csv(path)
    return df['HitFrame'].values

class Customize_Dataset(Dataset):
    def __init__(self, df, transforms=None, is_train=True):
        self.df = df
        self.transforms = transforms
        self.is_train= is_train
    
    def __getitem__(self, index):
        data = self.df.loc[index]
        img = read_video(data['image_path'])
        img = img.transpose(1,2,0)
        
        if self.transforms:
            img = self.transforms(image=img)["image"]
        
        ## convert to 3 channel
        for i in range(len(img)):
            im= img[i]
            im= torch.stack([im, im, im], dim=0)
            if i==0: new_img= im.unsqueeze(dim=0)
            else: new_img= torch.cat([new_img, im.unsqueeze(dim=0)], dim=0)
        img= new_img.permute(1,0,2,3)  ## (channel, frames, H, W)
            
        return {
            'image': torch.tensor(img/255, dtype=torch.float32),
        }
    
    def __len__(self):
        return len(self.df)

# CFG

In [None]:
CFG= {
    'img_size': 640,
    'frame_length': 32,
    
    'thr': 0.5,
    'TTA': 1,  ## disable TTA= 1
    'model': [
        './test_model/hitframe/csn_s640_d32/cv0_best.pth',
        './test_model/hitframe/csn_s640_d32/cv1_best.pth',
        './test_model/hitframe/csn_s640_d32/cv4_best.pth',
    ]
}
CFG['sample_rate']= int(CFG['frame_length']/2)
CFG['model']= [ torch.load(m, map_location= 'cuda:0') for m in CFG['model'] ]
print(f"length of model: {len(CFG['model'])}")

# Prepare Dataset

In [None]:
import glob

paths_1= glob.glob('Data/羽球AICUP_001/part1/val/**/*mp4',recursive=True)
paths_2= glob.glob('Data/羽球AICUP_001/part2/test/**/*mp4',recursive=True)
paths= paths_1 + paths_2
valid_df= pd.DataFrame(columns=('image_path',))
valid_df['image_path']=paths
print(f'valid dataset: {len(valid_df)}')

valid_dataset= Customize_Dataset(valid_df.iloc[:].reset_index(drop=True), get_test_transform(CFG['img_size']))
valid_loader= DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0)
valid_df.head()

In [None]:
def inference(model, img):
    img= torch.unsqueeze(img, 0).cuda()
    for i, m in enumerate(model):
        with torch.no_grad():
            m.eval()
            imgs= torch.cat([img, img.flip(-1), img.flip(-2), img.flip(-1).flip(-2)], dim=0)
            pred= m(imgs[:CFG['TTA']])
            pred= pred.mean(dim=0)
                
        if i==0: preds= pred.sigmoid()
        else: preds+= pred.sigmoid()
            
    pred= preds/len(model)
    pred= pred.cpu().numpy().tolist()
    return pred

In [None]:
valid_df['pred_prob']= None
count= 0
for i, data in enumerate(tqdm(valid_loader)):
    for j in range(len(data['image'])):
        imgs= data['image'][j]
        
        total_pred= []
        indx=0
        for f in range(imgs.shape[1]):
            img= imgs[:,indx:indx+CFG['frame_length']]
            if img.shape[1]<CFG['frame_length']: break
            pred= inference(CFG['model'], img)
            if f==0: total_pred+= pred
            else:
                total_pred= np.array(total_pred)
                total_pred[-CFG['sample_rate']:]+= pred[:CFG['sample_rate']]
                total_pred= np.append(total_pred, pred[-CFG['sample_rate']:])
            indx+= CFG['sample_rate']
            
        ## overlap half area
        total_pred[ CFG['sample_rate']:-CFG['sample_rate'] ]/= 2
        total_pred= total_pred.tolist()
            
        ## fill total_pred length to label
        total_pred+= [0]* (imgs.shape[1]-len(total_pred))
        valid_df.at[count, 'pred_prob']= total_pred
        count+= 1
valid_df.head()

# Make_Submission

In [None]:
def array2index(ary):
    hitframe= []
    conti= []
    for i in range(len(ary)):
        if ary[i]==1:
            conti.append(i)
        if ary[i]!=1 and conti!=[]:
            if len(conti)%2==0: 
                take= len(conti)//2 -1
                hitframe.append(conti[take])
            else: 
                take= len(conti)//2 
                hitframe.append(np.mean(conti)-1)
            conti= []
            
        if i==len(ary)-1 and conti!=[]:
            if len(conti)%2==0: 
                take= len(conti)//2 -1
                hitframe.append(conti[take])
            else: 
                take= len(conti)//2 
                hitframe.append(np.mean(conti)-1)
            conti= []
            
    hitframe= np.array(hitframe).astype(int)+1
    return hitframe.tolist()

thr= CFG['thr']
cols= pd.read_csv('Data/hitframe.csv').drop(['img_name','group','fold'],axis=1).columns
submit= pd.DataFrame(columns=('VideoName',*cols))
count=0
for i in range(len(valid_df)):
    pred= valid_df.loc[i,'pred_prob']
    pred= np.array(pred)
    pred= np.where(pred>=thr, 1, 0)
    hitframe= array2index(pred)
    name= valid_df.loc[i,'image_path'].split('\\')[-1]
    for j,frame in enumerate(hitframe):
        submit.loc[count,'VideoName']= name
        submit.loc[count,'ShotSeq']= j+1
        submit.loc[count,'HitFrame']= frame
        count+=1

In [None]:
submit= submit.fillna(-1)
submit['Hitter']= 'Z'
submit['Winner']= 'Z'
submit.to_csv('submission.csv', index=False)
submit