In [1]:
# Импорт либ
import os
import random
import time
import warnings



import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn.functional as F

from box import Box
from tqdm import tqdm
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.io import read_video
warnings.simplefilter("ignore", UserWarning)

In [2]:
# Посмотрим на обновленную нашу дату 
df = pd.read_csv("../data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,name_video,label
0,0,videos/video_0000.mp4,tap dancing
1,1,videos/video_0001.mp4,tap dancing
2,2,videos/video_0002.mp4,tap dancing
3,3,videos/video_0003.mp4,tap dancing
4,4,videos/video_0004.mp4,tap dancing


In [3]:
# Закодируем лейблы в числовые значения 
unique_labels = df['label'].unique()
label_dict = {label: index for index, label in enumerate(unique_labels)}
df['target'] = df.label.map(label_dict)

df.head()

Unnamed: 0.1,Unnamed: 0,name_video,label,target
0,0,videos/video_0000.mp4,tap dancing,0
1,1,videos/video_0001.mp4,tap dancing,0
2,2,videos/video_0002.mp4,tap dancing,0
3,3,videos/video_0003.mp4,tap dancing,0
4,4,videos/video_0004.mp4,tap dancing,0


In [4]:
# Cоздадим конфиг для обучения модели
config = Box()

config.num_workers = 1
config.batch_size = 24
config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
config.seed = 1771
config.model_name = 'mc3_18'
config.num_features = df.target.nunique()
config.optimizer_lr = 0.0001
config.epochs = 15
config.test_size = 0.2

In [5]:
from vidaug import augmentors as va

In [6]:
# Напишем датаcэт для наших данных 

class DanceDanceDataset(Dataset):
    def __init__(self, df, is_train = False):
        self.df = df
        self.video_path = ".."
        self.is_train = is_train
        
#         sometimes = lambda aug: va.Sometimes(0.5, aug)
#         self.aug = va.Sequential([ 
#             va.RandomRotate(degrees=(-40, 40)),  
#             sometimes(va.HorizontalFlip())])
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.loc[idx]
        target = row['target']
        
        video_path = os.path.join(self.video_path, row['name_video'])

        video, audio, info = read_video(video_path, pts_unit="sec")
        # Беру только 16 кадров. 
        if len(video) > 0:
            if len(video) < 48:
                video = video[:16] 
            else:
                video = video[:48:3]
            if self.is_train:
                video = video.numpy()
                video = self.aug(video)
                video = torch.Tensor(video)
            resize_transform = transforms.Resize((112, 112))
            video_resized = torch.stack([resize_transform(frame.permute(2, 0, 1)).permute(1, 2, 0) for frame in video])
            video_normalized = video_resized.permute(3, 0, 1, 2) 

            # Лениво нормализую 
            tensor_3d = video_normalized / 255 
        else:
            tensor_3d = torch.empty(3, 16, 112, 112)
            
        label = torch.tensor(target).long()
        return tensor_3d, label

In [7]:
dataset_train = DanceDanceDataset(df.reset_index())
dataset_train[0][0].shape

torch.Size([3, 16, 112, 112])

In [8]:
# Разобьем наши данные на тест и трейн. cоздадим тренеровочный и тестовый датасэт и даталоадэры
train_df, val_df = train_test_split(df, 
                                    test_size=config.test_size,
                                    random_state=config.seed,
                                    stratify=df['target']
                                   )
dataset_train = DanceDanceDataset(train_df.reset_index())
dataset_test = DanceDanceDataset(val_df.reset_index())

train_loader = DataLoader(dataset_train,
                          batch_size=config.batch_size,
                          shuffle=True,
#                          num_workers=config.num_workers
                         )
valid_loader = DataLoader(dataset_test,
                          batch_size=config.batch_size,
#                          num_workers=config.num_workers
                         )

In [9]:
# https://pytorch.org/vision/main/models/generated/torchvision.models.video.mc3_18.html#mc3-18

# Загрузка предобученной модели mc3_18
model = models.video.mc3_18(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, config.num_features)
model.to(config.device)
config.device

device(type='cuda')

In [10]:
loss_f = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=config.optimizer_lr)
scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.8)

In [11]:
# Проведем обучение модели. Для корректной работы и для защиты от сбоев будем сохранять модель после каждой эпохи
for epoch_i in range(1, config.epochs + 1):
    start = time.time()

    print(f'---------------------epoch:{epoch_i}/{config.epochs}---------------------')

    # loss
    avg_train_loss = 0
    avg_val_loss = 0
    summa = 0
    ############## Train #############
    model.train()
    train_pbar = tqdm(train_loader, desc="Training")
    for X,y in (train_pbar):
        X_batch = X.to(config.device)
        y_batch = y.to(config.device)

        optimizer.zero_grad()
        res = model.forward(X_batch)
    
        loss = loss_f(res, y_batch)

        if torch.cuda.is_available():
            train_pbar.set_postfix(gpu_load=f"{torch.cuda.memory_allocated() / 1024 ** 3:.2f}GB",
                                   loss=f"{loss.item():.4f}")
        else:
            train_pbar.set_postfix(loss=f"{loss.item():.4f}")

        loss.backward()
        optimizer.step()
        avg_train_loss += loss * len(y_batch)

        del X, res
        

    
    ########## VALIDATION ###############
    model.eval()
    valid_pbar = tqdm(valid_loader, desc="Testing")
    with torch.no_grad():
        for X,y in (valid_pbar):
            X_batch = X.to(config.device)
            y_batch = y.to(config.device)

            res = model.forward(X_batch)
            
            loss = loss_f(res, y_batch)
            avg_val_loss += loss * len(y_batch)
            valid_pbar.set_postfix(loss=f"{loss.item():.4f}")

            res = res.detach().cpu()
            y_batch = y_batch.cpu()
            
            preds = torch.max(F.softmax(res, dim=1), dim=1)
            correct= torch.eq(preds[1], y_batch)
            summa += torch.sum(correct).item()

            del X, res
            

    torch.cuda.empty_cache()

    avg_train_loss = avg_train_loss / len(dataset_train)
    avg_val_loss = avg_val_loss / len(dataset_test)
    
    acc = summa / len(dataset_test)

    print(f'epoch: {epoch_i}, lr_rate {optimizer.param_groups[0]["lr"]}')

    print("loss_train: %0.4f| loss_valid: %0.4f|" % (avg_train_loss, avg_val_loss))
    print(f"metric {acc:.<5g}")

    elapsed_time = time.time() - start
    hours = int(elapsed_time // 3600)
    minutes = int((elapsed_time % 3600) // 60)
    seconds = int(elapsed_time % 60)
    print(f"Elapsed time: {hours:02d}:{minutes:02d}:{seconds:02d}")
    scheduler.step()
    torch.save(model, f"model_ep_{epoch_i}.pt")

---------------------epoch:1/15---------------------


Training: 100%|██████████| 81/81 [16:27<00:00, 12.19s/it, gpu_load=5.11GB, loss=1.9107]
Testing: 100%|██████████| 21/21 [02:27<00:00,  7.01s/it, loss=1.5207]


epoch: 1, lr_rate 0.0001
loss_train: 2.0723| loss_valid: 1.6556|
metric 0.514403
Elapsed time: 00:18:54
---------------------epoch:2/15---------------------


Training: 100%|██████████| 81/81 [12:16<00:00,  9.10s/it, gpu_load=5.11GB, loss=0.9362]
Testing: 100%|██████████| 21/21 [00:55<00:00,  2.64s/it, loss=1.2006]


epoch: 2, lr_rate 0.0001
loss_train: 1.1971| loss_valid: 1.4653|
metric 0.54321
Elapsed time: 00:13:12
---------------------epoch:3/15---------------------


Training: 100%|██████████| 81/81 [16:31<00:00, 12.24s/it, gpu_load=5.11GB, loss=0.7329]
Testing: 100%|██████████| 21/21 [01:04<00:00,  3.08s/it, loss=1.1129]


epoch: 3, lr_rate 0.0001
loss_train: 0.7036| loss_valid: 1.4358|
metric 0.55144
Elapsed time: 00:17:35
---------------------epoch:4/15---------------------


Training: 100%|██████████| 81/81 [03:38<00:00,  2.70s/it, gpu_load=5.11GB, loss=0.4108]
Testing: 100%|██████████| 21/21 [00:52<00:00,  2.49s/it, loss=0.9564]


epoch: 4, lr_rate 8e-05
loss_train: 0.3577| loss_valid: 1.4219|
metric 0.541152
Elapsed time: 00:04:30
---------------------epoch:5/15---------------------


Training: 100%|██████████| 81/81 [04:02<00:00,  2.99s/it, gpu_load=5.11GB, loss=0.1264]
Testing: 100%|██████████| 21/21 [00:51<00:00,  2.47s/it, loss=0.9055]


epoch: 5, lr_rate 8e-05
loss_train: 0.1898| loss_valid: 1.4491|
metric 0.534979
Elapsed time: 00:04:54
---------------------epoch:6/15---------------------


Training: 100%|██████████| 81/81 [03:35<00:00,  2.66s/it, gpu_load=5.11GB, loss=0.0775]
Testing:  33%|███▎      | 7/21 [00:32<01:04,  4.63s/it, loss=1.4996]


KeyboardInterrupt: 

In [12]:
# Метрики стали падать - сеть переобучилась. Преостанавливаем обучение