In [1]:
"""Обучить модель на отдельных кадрах и провести сравнение"""

# Начало блокнота до создания модели скопирую с блокнота 2

'Обучить модель на отдельных кадрах и провести сравнение'

In [2]:
# Импорт либ
import os
import random
import time
import warnings

import torch
import timm
import pandas as pd
import numpy as np
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn.functional as F
import albumentations as A

from box import Box
from tqdm import tqdm
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.io import read_video
from albumentations.pytorch.transforms import ToTensorV2

warnings.simplefilter("ignore", UserWarning)

In [3]:
# Посмотрим на обновленную нашу дату 
df = pd.read_csv("../data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,name_video,label
0,0,videos/video_0000.mp4,tap dancing
1,1,videos/video_0001.mp4,tap dancing
2,2,videos/video_0002.mp4,tap dancing
3,3,videos/video_0003.mp4,tap dancing
4,4,videos/video_0004.mp4,tap dancing


In [4]:
# Закодируем лейблы в числовые значения 
unique_labels = df['label'].unique()
label_dict = {label: index for index, label in enumerate(unique_labels)}
df['target'] = df.label.map(label_dict)

df.head()

Unnamed: 0.1,Unnamed: 0,name_video,label,target
0,0,videos/video_0000.mp4,tap dancing,0
1,1,videos/video_0001.mp4,tap dancing,0
2,2,videos/video_0002.mp4,tap dancing,0
3,3,videos/video_0003.mp4,tap dancing,0
4,4,videos/video_0004.mp4,tap dancing,0


In [5]:
# Cоздадим конфиг для обучения модели
config = Box()

config.num_workers = 1
config.batch_size = 24
config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
config.seed = 1771
config.model_name = 'tf_efficientnet_b0_ns'
config.num_features = df.target.nunique()
config.optimizer_lr = 0.001
config.epochs = 20
config.test_size = 0.2

Как как основная идея задания 2 заключается в обучения модели на отдельных кадрах, то мы будем рандомно дергать случайны кадр из видео, и обрабатывать его как изображение, пытаясь классифицировать действие, которое на нем происходит. То есть мы будем здесь классифицировать картинки - кадры из картинки. 
Для обучения модели возьмем известную сеть и быструю сеть tf_efficientnet_b0_ns - это улучшенная версия сети effnet_b0, обученная большем кол-ве данных и более мощной версии.

In [6]:
# Изменим наш датасет 

class DanceRndImgSet(Dataset):
    def __init__(self, df, is_train = False):
        self.df = df
        self.video_path = ".."
        self.is_train = is_train
        if is_train:
            self.aug =  A.Compose([
                A.Resize(height=224, width=224, always_apply=True),
                A.Rotate([-30,30], p=1),
                A.CoarseDropout(max_height=int(224 * 0.17), max_width=int(224 * 0.17),
                                 min_holes=4, max_holes=9, p=0.7),
                A.GaussianBlur(blur_limit=(3, 7), p=0.05),
                A.RandomSnow(p=0.05),
                A.RandomRain(p=0.05),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],),
                ToTensorV2(),
            ])
        else:
            self.aug =  A.Compose([
                A.Resize(height=224, width=224, always_apply=True),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],),
                ToTensorV2(),
            ])
            
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.loc[idx]
        target = row['target']
        
        video_path = os.path.join(self.video_path, row['name_video'])

        video, audio, info = read_video(video_path, pts_unit="sec")
        # Беру случайный кадр 
        if len(video) > 0:
            total_frames = video.shape[0]
            random_frame_index = torch.randint(0, total_frames, (1,)).item()
            random_frame = video[random_frame_index].numpy()
            frame_with_aug = self.aug(image=random_frame)['image']
            
        else:
            random_frame = torch.randint(0, 256, (244, 244, 3), dtype=torch.uint8).numpy()
            frame_with_aug = self.aug(image=random_frame)['image']
            
        label = torch.tensor(target).long()
        return frame_with_aug, label

In [7]:
# Разобьем наши данные на тест и трейн. cоздадим тренеровочный и тестовый датасэт и даталоадэры
train_df, val_df = train_test_split(df, 
                                    test_size=config.test_size,
                                    random_state=config.seed,
                                    stratify=df['target']
                                   )
dataset_train = DanceRndImgSet(train_df.reset_index(),
                                 is_train=True)
dataset_test = DanceRndImgSet(val_df.reset_index())

train_loader = DataLoader(dataset_train,
                          batch_size=config.batch_size,
                          shuffle=True,
#                          num_workers=config.num_workers
                         )
valid_loader = DataLoader(dataset_test,
                          batch_size=config.batch_size,
#                          num_workers=config.num_workers
                         )

In [8]:
model_name = config.model_name
model = timm.create_model(model_name, pretrained=True)
model.classifier = nn.Sequential(
    nn.Linear(model.classifier.in_features, config.epochs)
)
model.to(config.device)

config.device

device(type='cuda')

In [9]:
loss_f = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=config.optimizer_lr)
scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.8)

In [10]:
# Проведем обучение модели. Для корректной работы и для защиты от сбоев будем сохранять модель после каждой эпохи
for epoch_i in range(1, config.epochs + 1):
    start = time.time()

    print(f'---------------------epoch:{epoch_i}/{config.epochs}---------------------')

    # loss
    avg_train_loss = 0
    avg_val_loss = 0
    summa = 0
    ############## Train #############
    model.train()
    train_pbar = tqdm(train_loader, desc="Training")
    for X,y in (train_pbar):
        X_batch = X.to(config.device)
        y_batch = y.to(config.device)

        optimizer.zero_grad()
        res = model.forward(X_batch)
    
        loss = loss_f(res, y_batch)

        if torch.cuda.is_available():
            train_pbar.set_postfix(gpu_load=f"{torch.cuda.memory_allocated() / 1024 ** 3:.2f}GB",
                                   loss=f"{loss.item():.4f}")
        else:
            train_pbar.set_postfix(loss=f"{loss.item():.4f}")

        loss.backward()
        optimizer.step()
        avg_train_loss += loss * len(y_batch)

        del X, res
        

    
    ########## VALIDATION ###############
    model.eval()
    valid_pbar = tqdm(valid_loader, desc="Testing")
    with torch.no_grad():
        for X,y in (valid_pbar):
            X_batch = X.to(config.device)
            y_batch = y.to(config.device)

            res = model.forward(X_batch)
            
            loss = loss_f(res, y_batch)
            avg_val_loss += loss * len(y_batch)
            valid_pbar.set_postfix(loss=f"{loss.item():.4f}")

            res = res.detach().cpu()
            y_batch = y_batch.cpu()
            
            preds = torch.max(F.softmax(res, dim=1), dim=1)
            correct= torch.eq(preds[1], y_batch)
            summa += torch.sum(correct).item()

            del X, res
            

    torch.cuda.empty_cache()

    avg_train_loss = avg_train_loss / len(dataset_train)
    avg_val_loss = avg_val_loss / len(dataset_test)
    
    acc = summa / len(dataset_test)

    print(f'epoch: {epoch_i}, lr_rate {optimizer.param_groups[0]["lr"]}')

    print("loss_train: %0.4f| loss_valid: %0.4f|" % (avg_train_loss, avg_val_loss))
    print(f"metric {acc:.<5g}")

    elapsed_time = time.time() - start
    hours = int(elapsed_time // 3600)
    minutes = int((elapsed_time % 3600) // 60)
    seconds = int(elapsed_time % 60)
    print(f"Elapsed time: {hours:02d}:{minutes:02d}:{seconds:02d}")
    scheduler.step()
    torch.save(model, f"model_ep_{epoch_i}.pt")

---------------------epoch:1/20---------------------


Training: 100%|██████████| 81/81 [02:51<00:00,  2.11s/it, gpu_load=2.00GB, loss=2.0545]
Testing: 100%|██████████| 21/21 [00:43<00:00,  2.08s/it, loss=2.4204]


epoch: 1, lr_rate 0.001
loss_train: 2.5715| loss_valid: 2.4394|
metric 0.238683
Elapsed time: 00:03:34
---------------------epoch:2/20---------------------


Training: 100%|██████████| 81/81 [03:02<00:00,  2.25s/it, gpu_load=2.00GB, loss=2.2720]
Testing: 100%|██████████| 21/21 [00:42<00:00,  2.03s/it, loss=2.5425]


epoch: 2, lr_rate 0.001
loss_train: 2.3152| loss_valid: 2.3477|
metric 0.238683
Elapsed time: 00:03:44
---------------------epoch:3/20---------------------


Training: 100%|██████████| 81/81 [02:48<00:00,  2.08s/it, gpu_load=2.00GB, loss=1.6756]
Testing: 100%|██████████| 21/21 [00:40<00:00,  1.95s/it, loss=2.1356]


epoch: 3, lr_rate 0.001
loss_train: 2.1620| loss_valid: 2.2660|
metric 0.263374
Elapsed time: 00:03:29
---------------------epoch:4/20---------------------


Training: 100%|██████████| 81/81 [02:51<00:00,  2.11s/it, gpu_load=2.00GB, loss=2.4227]
Testing: 100%|██████████| 21/21 [00:41<00:00,  1.96s/it, loss=2.9795]


epoch: 4, lr_rate 0.0008
loss_train: 1.9605| loss_valid: 2.1919|
metric 0.31893
Elapsed time: 00:03:32
---------------------epoch:5/20---------------------


Training: 100%|██████████| 81/81 [02:51<00:00,  2.11s/it, gpu_load=2.00GB, loss=1.5113]
Testing: 100%|██████████| 21/21 [00:41<00:00,  1.95s/it, loss=2.3763]


epoch: 5, lr_rate 0.0008
loss_train: 1.8707| loss_valid: 2.2770|
metric 0.32716
Elapsed time: 00:03:32
---------------------epoch:6/20---------------------


Training: 100%|██████████| 81/81 [02:49<00:00,  2.10s/it, gpu_load=2.00GB, loss=1.9566]
Testing: 100%|██████████| 21/21 [00:41<00:00,  1.97s/it, loss=2.1417]


epoch: 6, lr_rate 0.0008
loss_train: 1.8133| loss_valid: 2.4097|
metric 0.294239
Elapsed time: 00:03:31
---------------------epoch:7/20---------------------


Training: 100%|██████████| 81/81 [02:49<00:00,  2.10s/it, gpu_load=2.00GB, loss=1.8554]
Testing: 100%|██████████| 21/21 [00:41<00:00,  1.96s/it, loss=2.0411]


epoch: 7, lr_rate 0.00064
loss_train: 1.6744| loss_valid: 2.2558|
metric 0.320988
Elapsed time: 00:03:30
---------------------epoch:8/20---------------------


Training: 100%|██████████| 81/81 [02:52<00:00,  2.13s/it, gpu_load=2.00GB, loss=1.1476]
Testing: 100%|██████████| 21/21 [00:42<00:00,  2.00s/it, loss=1.6063]


epoch: 8, lr_rate 0.00064
loss_train: 1.5373| loss_valid: 2.4207|
metric 0.337449
Elapsed time: 00:03:34
---------------------epoch:9/20---------------------


Training: 100%|██████████| 81/81 [03:00<00:00,  2.23s/it, gpu_load=2.00GB, loss=1.6136]
Testing: 100%|██████████| 21/21 [00:44<00:00,  2.11s/it, loss=1.0565]


epoch: 9, lr_rate 0.00064
loss_train: 1.4162| loss_valid: 2.4112|
metric 0.316872
Elapsed time: 00:03:45
---------------------epoch:10/20---------------------


Training: 100%|██████████| 81/81 [02:49<00:00,  2.10s/it, gpu_load=2.00GB, loss=1.1814]
Testing: 100%|██████████| 21/21 [00:41<00:00,  1.95s/it, loss=1.4449]


epoch: 10, lr_rate 0.0005120000000000001
loss_train: 1.2557| loss_valid: 2.4806|
metric 0.345679
Elapsed time: 00:03:30
---------------------epoch:11/20---------------------


Training: 100%|██████████| 81/81 [02:55<00:00,  2.17s/it, gpu_load=2.00GB, loss=1.1574]
Testing: 100%|██████████| 21/21 [00:42<00:00,  2.04s/it, loss=1.7151]


epoch: 11, lr_rate 0.0005120000000000001
loss_train: 1.1568| loss_valid: 2.5710|
metric 0.31893
Elapsed time: 00:03:38
---------------------epoch:12/20---------------------


Training: 100%|██████████| 81/81 [02:51<00:00,  2.12s/it, gpu_load=2.00GB, loss=0.9832]
Testing: 100%|██████████| 21/21 [00:42<00:00,  2.02s/it, loss=1.9996]


epoch: 12, lr_rate 0.0005120000000000001
loss_train: 1.0686| loss_valid: 2.6311|
metric 0.316872
Elapsed time: 00:03:33
---------------------epoch:13/20---------------------


Training: 100%|██████████| 81/81 [02:55<00:00,  2.17s/it, gpu_load=2.00GB, loss=1.3837]
Testing: 100%|██████████| 21/21 [00:39<00:00,  1.89s/it, loss=1.5285]


epoch: 13, lr_rate 0.0004096000000000001
loss_train: 0.9860| loss_valid: 2.5562|
metric 0.341564
Elapsed time: 00:03:35
---------------------epoch:14/20---------------------


Training: 100%|██████████| 81/81 [02:56<00:00,  2.18s/it, gpu_load=2.00GB, loss=1.1017]
Testing: 100%|██████████| 21/21 [00:42<00:00,  2.02s/it, loss=1.9806]


epoch: 14, lr_rate 0.0004096000000000001
loss_train: 0.8527| loss_valid: 2.7877|
metric 0.316872
Elapsed time: 00:03:39
---------------------epoch:15/20---------------------


Training: 100%|██████████| 81/81 [02:52<00:00,  2.13s/it, gpu_load=2.00GB, loss=0.7469]
Testing: 100%|██████████| 21/21 [00:41<00:00,  1.99s/it, loss=1.8817]


epoch: 15, lr_rate 0.0004096000000000001
loss_train: 0.7964| loss_valid: 2.6725|
metric 0.320988
Elapsed time: 00:03:33
---------------------epoch:16/20---------------------


Training: 100%|██████████| 81/81 [02:46<00:00,  2.06s/it, gpu_load=2.00GB, loss=0.8067]
Testing: 100%|██████████| 21/21 [00:39<00:00,  1.89s/it, loss=1.5677]


epoch: 16, lr_rate 0.0003276800000000001
loss_train: 0.7470| loss_valid: 2.8103|
metric 0.345679
Elapsed time: 00:03:26
---------------------epoch:17/20---------------------


Training: 100%|██████████| 81/81 [02:58<00:00,  2.20s/it, gpu_load=2.00GB, loss=0.3578]
Testing: 100%|██████████| 21/21 [00:43<00:00,  2.09s/it, loss=2.5000]


epoch: 17, lr_rate 0.0003276800000000001
loss_train: 0.6188| loss_valid: 2.8459|
metric 0.335391
Elapsed time: 00:03:42
---------------------epoch:18/20---------------------


Training: 100%|██████████| 81/81 [02:49<00:00,  2.10s/it, gpu_load=2.00GB, loss=0.5374]
Testing: 100%|██████████| 21/21 [00:40<00:00,  1.92s/it, loss=2.0449]


epoch: 18, lr_rate 0.0003276800000000001
loss_train: 0.6015| loss_valid: 2.9375|
metric 0.320988
Elapsed time: 00:03:30
---------------------epoch:19/20---------------------


Training: 100%|██████████| 81/81 [02:49<00:00,  2.09s/it, gpu_load=2.00GB, loss=0.2645]
Testing: 100%|██████████| 21/21 [00:42<00:00,  2.02s/it, loss=2.7694]


epoch: 19, lr_rate 0.0002621440000000001
loss_train: 0.5543| loss_valid: 2.9600|
metric 0.341564
Elapsed time: 00:03:32
---------------------epoch:20/20---------------------


Training: 100%|██████████| 81/81 [02:52<00:00,  2.13s/it, gpu_load=2.00GB, loss=0.6243]
Testing: 100%|██████████| 21/21 [00:48<00:00,  2.33s/it, loss=1.8829]

epoch: 20, lr_rate 0.0002621440000000001
loss_train: 0.5373| loss_valid: 2.9396|
metric 0.335391
Elapsed time: 00:03:41





In [11]:
# Как мы видем после 10ой эпохи модель начала расходиться. Дальше 10ой эпохи началось переобучение модели.