In [1]:
!gdown 1P6t3MAyTlsdN6ZQ9dzWtkHimYVY44J26

Downloading...
From: https://drive.google.com/uc?id=1P6t3MAyTlsdN6ZQ9dzWtkHimYVY44J26
To: /content/videos.zip
100% 293M/293M [00:03<00:00, 80.0MB/s]


In [2]:
!gdown 1ajNw5P-mcRtezZ8b37ad-4UMFW5uirnS
!gdown 1Wcliu4PZgChLtoG3_eMYaM03AYnrdA7u

Downloading...
From: https://drive.google.com/uc?id=1ajNw5P-mcRtezZ8b37ad-4UMFW5uirnS
To: /content/train.csv
100% 57.7k/57.7k [00:00<00:00, 80.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Wcliu4PZgChLtoG3_eMYaM03AYnrdA7u
To: /content/val.csv
100% 6.44k/6.44k [00:00<00:00, 17.8MB/s]


In [3]:
import os
import cv2
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import albumentations as A

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import f1_score

In [4]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [5]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cuda:0'

In [6]:
train_df = pd.read_csv('train.csv')
val_df = pd.read_csv('val.csv')

In [7]:
train_df.head(10)

Unnamed: 0,video_id,label
0,01781.mp4,mosh pit dancing
1,01005.mp4,dancing gangnam style
2,00305.mp4,breakdancing
3,01385.mp4,dancing macarena
4,00493.mp4,country line dancing
5,02033.mp4,salsa dancing
6,01817.mp4,robot dancing
7,01143.mp4,dancing gangnam style
8,00233.mp4,breakdancing
9,02999.mp4,tap dancing


In [8]:
classes = train_df.label.unique().tolist()
classes

['mosh pit dancing',
 'dancing gangnam style',
 'breakdancing',
 'dancing macarena',
 'country line dancing',
 'salsa dancing',
 'robot dancing',
 'tap dancing',
 'dancing ballet',
 'square dancing',
 'belly dancing',
 'jumpstyle dancing',
 'dancing charleston',
 'swing dancing',
 'tango dancing']

In [9]:
classes_dict = {x: str(i) for i, x in enumerate(classes)}
classes_dict

{'mosh pit dancing': '0',
 'dancing gangnam style': '1',
 'breakdancing': '2',
 'dancing macarena': '3',
 'country line dancing': '4',
 'salsa dancing': '5',
 'robot dancing': '6',
 'tap dancing': '7',
 'dancing ballet': '8',
 'square dancing': '9',
 'belly dancing': '10',
 'jumpstyle dancing': '11',
 'dancing charleston': '12',
 'swing dancing': '13',
 'tango dancing': '14'}

In [10]:
train_df['map_label'] = train_df['label'].map(classes_dict)
val_df['map_label'] = val_df['label'].map(classes_dict)

In [19]:
!mkdir processed

In [None]:
!unzip videos.zip

In [13]:
paths = sorted(os.listdir('videos'))
len(paths)

2379

In [14]:
from PIL import Image

In [15]:
from torchvision import transforms

left, top, right and bottom borders respectively.

In [16]:
RESCALE_SIZE = 112

def prepare_sample(pic: Image) -> np.array:
    h, w, _ = pic.size()
    # padding
    max_wh = np.max([w, h])
    hp = int((max_wh - w) / 2)
    vp = int((max_wh - h) / 2)
    padding = (hp, vp, hp, vp)
    # padding = (hp, vp)
    image = transforms.functional.pad(pic, padding, 0, 'constant')
    print(image.shape)
    image = transforms.functional.resize(image, (RESCALE_SIZE, RESCALE_SIZE))
    image = image.permute((1,2,0))

    return np.array(image)

In [None]:
root = 'videos'
out_path = 'processed'
count_frame = 16

for vid in paths:
    check_path = os.path.join(out_path, vid.split('.')[0])
    if not os.path.exists(check_path):
        os.mkdir(check_path)
    cap = cv2.VideoCapture(os.path.join(root, vid))
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    amount = 5
    i, j = 0, 0
    flag = True
    while flag:
        ret, frame = cap.read()
        if ret:
            if j % amount == 0:
                frame = frame[:, :, ::-1]
                frame = torch.from_numpy(frame.copy())
                print(frame.size())
                frame = prepare_sample(frame.permute((2,0,1)))
                print(frame.shape)
                save_path = os.path.join(out_path, vid.split('.')[0], f'{i}' + '.jpg')
                plt.imsave(save_path, frame)
                last_frame = frame
                i += 1

            if i == count_frame:
                flag = False
            j += 1
        else:
            if i < count_frame:
                for _ in range(count_frame - i):
                    save_path = os.path.join(out_path, vid.split('.')[0], f'{i}' + '.jpg')
                    plt.imsave(save_path, last_frame)
                    i += 1
                flag = False


In [21]:
len(os.listdir('processed'))

2379

In [22]:
train_ids = [(row.video_id.split('.')[0], int(row.map_label)) for row in train_df.itertuples()]
val_ids = [(row.video_id.split('.')[0], int(row.map_label)) for row in val_df.itertuples()]

In [23]:
import albumentations as A

In [24]:
import albumentations.pytorch as Ap

In [77]:
transform_train = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.5,
                               contrast_limit=0.5, p=0.5),
    A.augmentations.transforms.Normalize(mean=(0.5, 0.5, 0.5),
                                         std=(0.5, 0.5, 0.5)),
    Ap.transforms.ToTensorV2()
], additional_targets={
    f'image{i}': 'image'
    for i in range(1, 16)
})

In [78]:
transform_val= A.Compose([
    A.augmentations.transforms.Normalize(mean=(0.5, 0.5, 0.5),
                                         std=(0.5, 0.5, 0.5)),
    Ap.transforms.ToTensorV2()
], additional_targets={
    f'image{i}': 'image'
    for i in range(1, 16)
})

In [80]:
class CustomDataset(Dataset):

    def __init__(self, images_ids, images_path_root, transform=None):

        self.images_ids = images_ids
        self.images_path_root = images_path_root
        self.transform = transform

    def __len__(self):

        return len(self.images_ids)

    def load_sample(self, paths):
        images = []
        for path in paths:
            img = plt.imread(path)
            images.append(img)

        return np.array(images, dtype=np.float32)

    def apply_augmentations(self, array):
        targets={'image': array[0]}
        for i in range(1, 16):
            targets[f'image{i}'] = array[i]
        transformed = self.transform(**targets)
        transformed = torch.cat(
            [transformed['image'].unsqueeze(1)] +
            [transformed[f'image{i}'].unsqueeze(1)
            for i in range(1, 16)], axis=1
        )

        return transformed

    def __getitem__(self, index):
        dir, label = self.images_ids[index]
        # loading, transforming and stacking together
        paths = [os.path.join(self.images_path_root,
                              dir, f'{str(x)}.jpg') for x in range(16)]

        tensor = self.load_sample(paths) / 255
        tensor = self.apply_augmentations(tensor)


        return tensor, label

In [82]:
train_dataset = CustomDataset(
    images_ids=train_ids,
    images_path_root='processed',
    transform=transform_train
)

val_dataset = CustomDataset(
    images_ids=val_ids,
    images_path_root='processed',
    transform=transform_val
)

In [83]:
train_loader = DataLoader(train_dataset, batch_size=15, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=15, shuffle=False, num_workers=2)

In [84]:
x,y = next(iter(train_loader))

In [85]:
x.shape

torch.Size([15, 3, 16, 112, 112])

In [86]:
y.shape

torch.Size([15])

In [87]:
class MobileNet3D_V2(nn.Module):

    def __init__(self, num_classes):
        super(MobileNet3D_V2, self).__init__()

        self.conv1 = nn.Conv3d(in_channels=3, out_channels=32, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=1)

        self.mobileblock1_1 = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=192, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=192, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=192),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=32, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(32)
        )

        self.mobileblock1_2 = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=192, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=192, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=192),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=16, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(16)
        )

        self.mobileblock2_1 = nn.Sequential(
            nn.Conv3d(in_channels=16, out_channels=96, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(96),
            nn.ReLU6(),
            nn.Conv3d(in_channels=96, out_channels=96, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=96),
            nn.BatchNorm3d(96),
            nn.ReLU6(),
            nn.Conv3d(in_channels=96, out_channels=16, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(16)
        )

        self.mobileblock2_2 = nn.Sequential(
            nn.Conv3d(in_channels=16, out_channels=96, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(96),
            nn.ReLU6(),
            nn.Conv3d(in_channels=96, out_channels=96, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=1, groups=96),
            nn.BatchNorm3d(96),
            nn.ReLU6(),
            nn.Conv3d(in_channels=96, out_channels=24, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(24)
        )

        self.mobileblock2_1_r = nn.Sequential(
            nn.Conv3d(in_channels=24, out_channels=144, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=144, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=144),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=24, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(24)
        )

        self.mobileblock2_2_r = nn.Sequential(
            nn.Conv3d(in_channels=24, out_channels=144, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=144, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=144),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=24, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(24)
        )

        self.mobileblock3_1 = nn.Sequential(
            nn.Conv3d(in_channels=24, out_channels=144, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=144, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=144),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=24, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(24)
        )

        self.mobileblock3_2 = nn.Sequential(
            nn.Conv3d(in_channels=24, out_channels=144, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=144, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=1, groups=144),
            nn.BatchNorm3d(144),
            nn.ReLU6(),
            nn.Conv3d(in_channels=144, out_channels=32, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(32)
        )

        self.mobileblock3_1_r = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=192, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=192, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=192),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=32, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(32)
        )

        self.mobileblock3_2_r = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=192, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=192, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=192),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=32, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(32)
        )

        self.mobileblock4_1 = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=192, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=192, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=192),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=32, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(32)
        )

        self.mobileblock4_2 = nn.Sequential(
            nn.Conv3d(in_channels=32, out_channels=192, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=192, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=1, groups=192),
            nn.BatchNorm3d(192),
            nn.ReLU6(),
            nn.Conv3d(in_channels=192, out_channels=64, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(64)
        )

        self.mobileblock4_1_r = nn.Sequential(
            nn.Conv3d(in_channels=64, out_channels=384, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=384, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=384),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=64, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(64)
        )

        self.mobileblock4_2_r = nn.Sequential(
            nn.Conv3d(in_channels=64, out_channels=384, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=384, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=384),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=64, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(64)
        )

        self.mobileblock5_1 = nn.Sequential(
            nn.Conv3d(in_channels=64, out_channels=384, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=384, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=384),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=64, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(64)
        )

        self.mobileblock5_2 = nn.Sequential(
            nn.Conv3d(in_channels=64, out_channels=384, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=384, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=384),
            nn.BatchNorm3d(384),
            nn.ReLU6(),
            nn.Conv3d(in_channels=384, out_channels=96, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(96)
        )

        self.mobileblock5_1_r = nn.Sequential(
            nn.Conv3d(in_channels=96, out_channels=576, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=576, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=576),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=96, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(96)
        )

        self.mobileblock5_2_r = nn.Sequential(
            nn.Conv3d(in_channels=96, out_channels=576, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=576, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=576),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=96, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(96)
        )

        self.mobileblock6_1 = nn.Sequential(
            nn.Conv3d(in_channels=96, out_channels=576, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=576, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=576),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=96, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(96)
        )

        self.mobileblock6_2 = nn.Sequential(
            nn.Conv3d(in_channels=96, out_channels=576, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=576, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=1, groups=576),
            nn.BatchNorm3d(576),
            nn.ReLU6(),
            nn.Conv3d(in_channels=576, out_channels=160, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(160)
        )

        self.mobileblock6_1_r = nn.Sequential(
            nn.Conv3d(in_channels=160, out_channels=960, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=960, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=960),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=160, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(160)
        )

        self.mobileblock6_2_r = nn.Sequential(
            nn.Conv3d(in_channels=160, out_channels=960, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=960, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=960),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=160, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(160)
        )

        self.mobileblock7_1 = nn.Sequential(
            nn.Conv3d(in_channels=160, out_channels=960, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=960, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=960),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=160, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(160)
        )

        self.mobileblock7_2 = nn.Sequential(
            nn.Conv3d(in_channels=160, out_channels=960, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=960, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=1, groups=960),
            nn.BatchNorm3d(960),
            nn.ReLU6(),
            nn.Conv3d(in_channels=960, out_channels=320, kernel_size=(1, 1, 1), stride=(1, 1, 1)),
            nn.BatchNorm3d(320)
        )

        self.conv8 = nn.Conv3d(in_channels=320, out_channels=1280, kernel_size=(1, 1, 1), stride=(1, 1, 1))
        self.avgpool = nn.AvgPool3d(kernel_size=(1, 4, 4), stride=(1, 1, 1))
        self.linear = nn.Linear(1280, num_classes)
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))

        x = self.mobileblock1_1(x) + x
        x = self.mobileblock1_2(x)

        x = self.mobileblock2_1(x) + x
        x = self.mobileblock2_2(x)
        x = self.mobileblock2_1_r(x) + x
        x = self.mobileblock2_2_r(x)

        x = self.mobileblock3_1(x) + x
        x = self.mobileblock3_2(x)
        x = self.mobileblock3_1_r(x) + x
        x = self.mobileblock3_2_r(x)
        x = self.mobileblock3_1_r(x) + x
        x = self.mobileblock3_2_r(x)

        x = self.mobileblock4_1(x) + x
        x = self.mobileblock4_2(x)
        x = self.mobileblock4_1_r(x) + x
        x = self.mobileblock4_2_r(x)
        x = self.mobileblock4_1_r(x) + x
        x = self.mobileblock4_2_r(x)
        x = self.mobileblock4_1_r(x) + x
        x = self.mobileblock4_2_r(x)

        x = self.mobileblock5_1(x) + x
        x = self.mobileblock5_2(x)
        x = self.mobileblock5_1_r(x) + x
        x = self.mobileblock5_2_r(x)
        x = self.mobileblock5_1_r(x) + x
        x = self.mobileblock5_2_r(x)

        x = self.mobileblock6_1(x) + x
        x = self.mobileblock6_2(x)
        x = self.mobileblock6_1_r(x) + x
        x = self.mobileblock6_2_r(x)
        x = self.mobileblock6_1_r(x) + x
        x = self.mobileblock6_2_r(x)

        x = self.mobileblock7_1(x) + x
        x = self.mobileblock7_2(x)

        x = self.relu(self.conv8(x))
        x = self.avgpool(x)
        x = self.flatten(x)
        output = self.linear(x)
        return output

In [88]:
!pip install torchmetrics clearml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [89]:
from clearml import Task
from torchmetrics.classification import MulticlassAccuracy
from torchmetrics.classification import MulticlassPrecision
from torchmetrics.classification import MulticlassRecall
from torchmetrics.classification import MulticlassF1Score


In [90]:
%env CLEARML_WEB_HOST=https://app.clear.ml
%env CLEARML_API_HOST=https://api.clear.ml
%env CLEARML_FILES_HOST=https://files.clear.ml
%env CLEARML_API_ACCESS_KEY=XW4OVGJGN4GTGHJ31PKP
%env CLEARML_API_SECRET_KEY=pLyGTZPdLNL4841KrZHaQL50CT0odPu3NwIiGJM7FSdkwOHDy6

env: CLEARML_WEB_HOST=https://app.clear.ml
env: CLEARML_API_HOST=https://api.clear.ml
env: CLEARML_FILES_HOST=https://files.clear.ml
env: CLEARML_API_ACCESS_KEY=XW4OVGJGN4GTGHJ31PKP
env: CLEARML_API_SECRET_KEY=pLyGTZPdLNL4841KrZHaQL50CT0odPu3NwIiGJM7FSdkwOHDy6


In [97]:
num_epochs = 10
lr = 1e-3

#инициализация эксперимента
task = Task.init(
    project_name='Action-classification',
    task_name='test_training_2'
)

logger = task.get_logger()

# инициализация модели
model = MobileNet3D_V2(num_classes=15)
model = model.to(device)

# оптимизатор
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

# гиперпараметры для логирования
parameters = {'optimizer': 'AdamW',
              'lr': lr,
              'num_epochs': num_epochs
              }

task.connect(parameters)

{'optimizer': 'AdamW', 'lr': 0.001, 'num_epochs': 10}

In [92]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [98]:
def train(model,
          num_epochs,
          train_loader,
          val_loader,
          optimizer,
          scheduler=None,
          device=device,
          loss_fn=nn.CrossEntropyLoss()
          ):


    train_loss = []
    val_loss = []

    accuracy_preds = []
    precision_preds = []
    recall_preds = []
    f1_preds = []
    targets = []

    for epoch in tqdm(range(num_epochs)):

        model.train()
        for features, labels in train_loader:
            features = features.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(features.float())
            loss_train = loss_fn(outputs, labels)
            loss_train.backward()
            optimizer.step()

            train_loss.append(loss_train.detach().item())

            with torch.no_grad():
                labels = labels.cpu()
                outputs = outputs.argmax(dim=1).cpu()
                accuracy_preds.extend(outputs)
                precision_preds.extend(outputs)
                recall_preds.extend(outputs)
                f1_preds.extend(outputs)
                targets.extend(labels)

        logger.report_scalar(title='Train/Validation Loss', series='Train loss', iteration=epoch, value=np.mean(train_loss))
        logger.report_scalar(title='Metrics train', series='Accuracy', iteration=epoch, value=accuracy_score(accuracy_preds, targets))
        logger.report_scalar(title='Metrics train', series='Precision', iteration=epoch, value=precision_score(precision_preds, targets, average='macro'))
        logger.report_scalar(title='Metrics train', series='Recall', iteration=epoch, value=recall_score(recall_preds, targets, average='macro'))
        logger.report_scalar(title='Metrics train', series='F1_score', iteration=epoch, value=f1_score(f1_preds, targets, average='macro'))

        train_loss.clear()
        accuracy_preds.clear()
        precision_preds.clear()
        recall_preds.clear()
        f1_preds.clear()
        targets.clear()

        model.eval()
        with torch.no_grad():
            for features, labels in val_loader:
                features = features.to(device)
                labels = labels.to(device)
                outputs = model(features.float())
                loss_val = loss_fn(outputs, labels)

                val_loss.append(loss_val.item())

                labels = labels.cpu()
                outputs = outputs.argmax(dim=1).cpu()

                accuracy_preds.extend(outputs)
                precision_preds.extend(outputs)
                recall_preds.extend(outputs)
                f1_preds.extend(outputs)
                targets.extend(labels)

        logger.report_scalar(title='Train/Validation Loss', series='Validation loss', iteration=epoch, value=np.mean(val_loss))
        logger.report_scalar(title='Metrics val', series='Accuracy', iteration=epoch, value=accuracy_score(accuracy_preds, targets))
        logger.report_scalar(title='Metrics val', series='Precision', iteration=epoch, value=precision_score(precision_preds, targets, average='macro'))
        logger.report_scalar(title='Metrics val', series='Recall', iteration=epoch, value=recall_score(recall_preds, targets, average='macro'))
        logger.report_scalar(title='Metrics val', series='F1_score', iteration=epoch, value=f1_score(f1_preds, targets, average='macro'))

        val_loss.clear()
        accuracy_preds.clear()
        precision_preds.clear()
        recall_preds.clear()
        f1_preds.clear()
        targets.clear()

        if scheduler:
           scheduler.step()

        torch.save(model.state_dict(), f'MobileNet3D_V2_{epoch+1}.pt')

    return model

In [103]:
train(model,
      num_epochs,
      train_loader,
      val_loader,
      optimizer
      )

  0%|          | 0/10 [00:00<?, ?it/s]

MobileNet3D_V2(
  (conv1): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1))
  (mobileblock1_1): Sequential(
    (0): Conv3d(32, 192, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (1): BatchNorm3d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6()
    (3): Conv3d(192, 192, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), groups=192)
    (4): BatchNorm3d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU6()
    (6): Conv3d(192, 32, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (7): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (mobileblock1_2): Sequential(
    (0): Conv3d(32, 192, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (1): BatchNorm3d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6()
    (3): Conv3d(192, 192, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), groups=192)
    (4): BatchNorm3d(192, e

In [102]:
torch.save(model.state_dict(), '/content/weights/1.pth')

In [104]:
task.close()