In [1]:
import os
import gc
import cv2
import copy
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
# from pytorch_toolbelt import losses as L

# Utils
from tqdm import tqdm
from IPython.display import display

# For Image Models
import timm

# # Albumentations for augmentations
# import albumentations as A
# from albumentations.pytorch import ToTensorV2

# Calculate accuracy
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import balanced_accuracy_score
# ## using gpu:1
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import warnings
warnings.filterwarnings("ignore")
from torch.utils.tensorboard import SummaryWriter

def seed_everything(seed=123):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything()

In [2]:
# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [3]:
class Customize_Model(nn.Module):
    def __init__(self, model_name, cls):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=True)
        
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        # is_rounded, is_backhand, ball_height, is_serve, locationX, locationY
        self.fc = nn.Linear(in_features, cls)
        self.sigmoid = nn.Sigmoid()

    def forward(self, image):
        x = self.model(image)
        x = self.fc(x)
        return x

In [4]:
def get_train_transform(img_size):
    return A.Compose([        
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=10, val_shift_limit=10, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.5),
        A.HorizontalFlip(p=0.5),
        A.Blur(blur_limit= 3, p=0.3),
        A.GaussNoise(p=0.3),
#         A.OneOf([
#             A.GridDistortion(num_steps=5, distort_limit=0.05, p=1.0),
#             A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=1.0)
#         ], p=0.3),        
        ToTensorV2(p=1.0),
    ])

def get_2s_train_transform(img_size):
    return A.Compose([
#         A.Resize(img_size, img_size),
        A.HorizontalFlip(p=0.5),
        ToTensorV2(p=1.0),
    ])


def get_test_transform(img_size):
    return A.Compose([
        ToTensorV2(p=1.0),
    ])

In [5]:
class Serve_Dataset(Dataset):
    def __init__(self, df, img_list, train_img_size, transforms=None):
        self.df = df
        self.img_list = img_list
        self.group = [x.replace('.','')for x in df['group'].tolist()]
        self.hitter = df['Hitter'].values
        self.hitframe = df['HitFrame'].values
        #For label
        self.is_serve = [1 if i==1 else 0 for i in df['ShotSeq'].tolist()] 
        self.hitterx = df['HitterLocationX'].tolist()
        self.hittery = df['HitterLocationY'].tolist()
        self.train_img_size = train_img_size
        self.transforms = transforms
    
    def __getitem__(self, index):
        #For Image
        hitter_list = ['A','B']
        index, hitter_index = index % len(self.df), index // len(self.df)
        if hitter_index == 0:
            result = [x for x in self.img_list if f'{self.group[index]}_{self.hitter[index]}_hitframe_{self.hitframe[index]}' in x]
            is_serve = 1
        else:
            if self.hitter[index] == 'A':
                defender='B'
            else:
                defender='A'
            result = [x for x in self.img_list if f'{self.group[index]}_{defender}_hitframe_{self.hitframe[index]}' in x]
            is_serve = 0
        path = f'Train_data/mix_sorted_player_bigwidth/all/{result[0]}'
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #For label
        x0 = int(result[0].split('_')[4][1:])
        y0 = int(result[0].split('_')[5][1:-4])
        location_xy = (self.hitterx[index] - x0, self.hittery[index] - y0)
        img, location_xy = self.resized_padding(img, self.train_img_size, location_xy)
        label_BCE = [is_serve]
        label_MSE = [location_xy[0] / self.train_img_size, location_xy[1] / self.train_img_size]
        label_MSE = [0 if i < 0 else i for i in label_MSE]
        label_MSE = [1 if i > 1 else i for i in label_MSE]
        if self.transforms:
            img = self.transforms(image=img)["image"]
        return {
            'image': torch.tensor(img/255, dtype=torch.float32),
            'hitter_location': torch.tensor(location_xy, dtype=torch.long),
            'label_BCE': torch.tensor(label_BCE, dtype=torch.long),
            'label_MSE': torch.tensor(label_MSE, dtype=torch.float32)
        }
    
    def __len__(self):
        return 2 * len(self.df)
    
    def resized_padding(self, img, train_img_size, locationXY):
        #Resize
        h, w = img.shape[:2]
        if h >= w:
            scale = train_img_size / h
            dim = (int(scale * w), train_img_size)
        else:
            scale = train_img_size / w
            dim = (train_img_size, int(scale * h))
        resized_locationXY = (int(locationXY[0] * scale), int(locationXY[1] * scale))
        resized_img = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
        #Padding
        left, top = 0, 0
        bottom = train_img_size - resized_img.shape[0]
        right = train_img_size - resized_img.shape[1]
        train_img = cv2.copyMakeBorder(resized_img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))

        return train_img, resized_locationXY

In [6]:
def train_epoch(dataloader, model, criterion, optimizer):
    scaler= amp.GradScaler()
    model.train()

    ep_loss_bce = []
    for i, data in enumerate(tqdm(dataloader)):
        imgs= data['image'].to('cuda')
        labels_bce = data['label_BCE'].to('cuda')
        
        with amp.autocast():
            output = model(imgs)
            loss_bce = criterion['bce'](output[:,3], labels_bce[:,0].float())
            total_loss = loss_bce 
            ep_loss_bce.append(loss_bce.item())
            total_loss/= CFG['gradient_accumulation']
            scaler.scale(total_loss).backward()
            
            if (i+1) % CFG['gradient_accumulation']== 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                
    return np.mean(ep_loss_bce)

def valid_epoch(valid_loader, model, criterion):
    model.eval()
    
    ep_valid_loss_bce, ep_acc = [], []
    preds, labels = [], [] 
    for i, data in enumerate(tqdm(valid_loader)):
        imgs= data['image'].to('cuda')
        labels_bce = data['label_BCE'].to('cuda')
        hitter_location = data['hitter_location']
        output = model(imgs)
        loss_bce = criterion['bce'](output[:,3], labels_bce[:,0].float())
        valid_loss = loss_bce 
        ep_valid_loss_bce.append(loss_bce.item())
        output = torch.where(output.sigmoid() >= 0.5, 1, 0)
        preds.extend([output[i].tolist() for i in range(len(output))])
        labels.extend([labels_bce[i].tolist() for i in range(len(labels_bce))])
        #output1 : [acc_round,acc_backhandmacc_ball_height,acc,serve] range from 0 - 1
    bacc_serve = balanced_accuracy_score([label[0] for label in labels] , [pred[3] for pred in preds])
    acc_serve = accuracy_score([label[0] for label in labels] , [pred[3] for pred in preds])
    return np.mean(ep_valid_loss_bce), bacc_serve, acc_serve

def calculate_acc(output, labels):
    #outputs [0.6,0.7,0.3,0.9,.....] -> [1, 1, 0, 1,....] len = batchsize
    #labels [1,0,0,1,....] len = batchsize
    output[output >= 0.5] = 1
    output = output.int()
    labels = labels.int()
    acc = accuracy_score(labels.tolist(), output.tolist())
    recall = recall_score(labels.tolist(), output.tolist())
    spe = recall_score(labels.tolist(), output.tolist(), pos_label = 0)
    
    return (recall + spe) / 2

CFG

In [7]:
CFG= {
    'fold': 0,
    'epoch': 40,
    'model_name': 'tf_efficientnet_b0_ns',
    'finetune': True,
    
    'img_size': 224,
    'batch_size': 10,
    'gradient_accumulation': 1,
    
    'lr': 3e-4,
    'weight_decay': 0,

    'num_classes': 4,
    'load_model':'',
    'save_model': './weight/train'
}
if CFG['finetune']:
    print('finetune model')
    CFG['load_model']= f"weight/cv{CFG['fold']}_effb0_classification_aug_all_best_bigwidth.pth"
    CFG['epoch']= 10
    CFG['lr']= 3e-5

finetune model


# Prepare Dataset(serve)¶

In [8]:
import matplotlib.pyplot as plt
img_list = os.listdir('Train_data/mix_sorted_player_bigwidth/all')
df= pd.read_csv('Train_data/hitframe.csv')

for fold in range(5):
    print(f'fold : {fold}')
    CFG['load_model'] = f"weight/cv{fold}_effb0_classification_aug_all_best_bigwidth_serve.pth"
    CFG['fold'] = fold
    train_df= df[(df['fold']!=CFG['fold'])&(df['ShotSeq']== 1)]
    valid_df= df[(df['fold']==CFG['fold'])&(df['ShotSeq']== 1)]
    print(f'train dataset: {len(train_df)}')
    print(f'valid dataset: {len(valid_df)}')
    print(f'all dataset: {len(img_list)}')

    if CFG['finetune']:
        train_dataset = Serve_Dataset(train_df, img_list, CFG['img_size'], get_train_transform(CFG['img_size']))
    else:
        train_dataset = Serve_Dataset(train_df, img_list, CFG['img_size'], get_train_transform(CFG['img_size']))
    valid_dataset = Serve_Dataset(valid_df, img_list, CFG['img_size'], get_test_transform(CFG['img_size']))
    train_loader = DataLoader(train_dataset, batch_size= CFG['batch_size'], shuffle=True, num_workers=0)
    valid_loader= DataLoader(valid_dataset, batch_size=CFG['batch_size'], shuffle=False, num_workers=0)

    ## create model
    if CFG['load_model']:
        print(f"load_model: {CFG['load_model']}")
        model= torch.load(CFG['load_model'], map_location= 'cuda')
    else:
        model= Customize_Model(CFG['model_name'], CFG['num_classes'])
    model.to('cuda')

    ## hyperparameter
    criterion = {'bce' : nn.BCEWithLogitsLoss()}
    optimizer = optim.AdamW(model.parameters(), lr= CFG['lr'], weight_decay= CFG['weight_decay'])
    ## start training
    best_score= 0

    for ep in range(0, CFG['epoch']+1):
        print(f'ep: {ep}')
        ## adjust lr
        if ep == 50:
            model= torch.load(f"{CFG['save_model']}/cv{CFG['fold']}_effb0_classification_aug_all_best_bigwidth_serve.pth")
            optimizer.param_groups[0]['lr'] = 1e-4
            print('Decrease learning rate to 1e-4!')
        

        writer = SummaryWriter(f"tensorboard_result/train_cv{CFG['fold']}_effb0_classification_aug_all_best_bigwidth_serve")
        train_loss_bce = train_epoch(train_loader, model, criterion, optimizer)
        valid_loss_bce, valid_bacc, valid_acc = valid_epoch(valid_loader, model, criterion)
        print(f'train loss bce: {train_loss_bce}')
        print(f'valid loss bce: {valid_loss_bce}')
        print(f'valid serve bacc : {valid_bacc}, valid round acc:{valid_acc}')
        all_bacc = valid_bacc
        print(f'all bacc: {all_bacc}')

        #writer
        writer.add_scalar("BCE Loss/train",train_loss_bce, ep)
        writer.add_scalar("BCE Loss/val",valid_loss_bce, ep)
        writer.add_scalar("All avg recall/val",all_bacc, ep)

        if all_bacc >= best_score:
            best_score= all_bacc
            torch.save(model, f"{CFG['save_model']}/cv{CFG['fold']}_effb0_classification_aug_all_best_bigwidth_serve.pth")
            print(f'model save at score: {best_score}')

        ## save model every epoch
        torch.save(model, f"{CFG['save_model']}/cv{CFG['fold']}_ep{ep}.pth")

fold : 0
train dataset: 640
valid dataset: 160
all dataset: 15690
load_model: Model/train/cv0_effb0_classification_aug_all_best_bigwidth_serve.pth


100%|██████████████████████████████████████████████████████████████████████████████████| 32/32 [00:04<00:00,  7.14it/s]


valid loss bce: 0.47033338434630423
valid serve bacc : 0.909375, valid round acc:0.909375
all bacc: 0.909375
fold : 1
train dataset: 640
valid dataset: 160
all dataset: 15690
load_model: Model/train/cv1_effb0_classification_aug_all_best_bigwidth_serve.pth


100%|██████████████████████████████████████████████████████████████████████████████████| 32/32 [00:02<00:00, 12.15it/s]


valid loss bce: 0.5175568693653076
valid serve bacc : 0.8875, valid round acc:0.8875
all bacc: 0.8875
fold : 2
train dataset: 640
valid dataset: 160
all dataset: 15690
load_model: Model/train/cv2_effb0_classification_aug_all_best_bigwidth_serve.pth


100%|██████████████████████████████████████████████████████████████████████████████████| 32/32 [00:02<00:00, 12.11it/s]


valid loss bce: 0.3256505203771667
valid serve bacc : 0.925, valid round acc:0.925
all bacc: 0.925
fold : 3
train dataset: 640
valid dataset: 160
all dataset: 15690
load_model: Model/train/cv3_effb0_classification_aug_all_best_bigwidth_serve.pth


100%|██████████████████████████████████████████████████████████████████████████████████| 32/32 [00:02<00:00, 11.43it/s]


valid loss bce: 0.297054655609827
valid serve bacc : 0.90625, valid round acc:0.90625
all bacc: 0.90625
fold : 4
train dataset: 640
valid dataset: 160
all dataset: 15690
load_model: Model/train/cv4_effb0_classification_aug_all_best_bigwidth_serve.pth


100%|██████████████████████████████████████████████████████████████████████████████████| 32/32 [00:03<00:00,  8.92it/s]

valid loss bce: 0.6763166031887522
valid serve bacc : 0.903125, valid round acc:0.903125
all bacc: 0.903125





# Test serve acc