**Import**


In [None]:
!pip install av
!pip install tqdm

In [2]:

import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.models.video import r2plus1d_18
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from PIL import Image
import time
from torchvision.io import read_video
import av
import pandas as pd
import numpy as np
import cv2


In [5]:
df = pd.read_csv('/content/drive/MyDrive/data.csv')
video_dir = '/content/drive/MyDrive/videos/..'


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# Создание экземпляра LabelEncoder
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(df['label'])
df['target'] = encoded_labels
print(df.head())

   Unnamed: 0             name_video        label  target
0           0  videos/video_0000.mp4  tap dancing      14
1           1  videos/video_0001.mp4  tap dancing      14
2           2  videos/video_0002.mp4  tap dancing      14
3           3  videos/video_0003.mp4  tap dancing      14
4           4  videos/video_0004.mp4  tap dancing      14


In [None]:

class VideoDataset(Dataset):
    def __init__(self, video_dir, df):
        self.video_dir = video_dir
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        video_path = os.path.join(self.video_dir, self.df['name_video'].iloc[idx])
        video, audio, info = read_video(video_path)
        video = self.preprocess_video(video)

        target = self.df['target'].iloc[idx]
        target = torch.tensor(target, dtype=torch.long)  # Преобразование в Tensor

        return video, target

    def preprocess_video(self, video):

        video = video.float()


        resize_transform = transforms.Resize((112, 112))
        video_resized = torch.stack([resize_transform(frame.permute(2, 0, 1)).permute(1, 2, 0) for frame in video])


        num_frames = video_resized.shape[0]
        desired_frames = 24
        if num_frames < desired_frames:

            last_frame = video_resized[-1]
            frames_to_add = desired_frames - num_frames
            video_resized = torch.cat([video_resized, last_frame.unsqueeze(0).expand(frames_to_add, -1, -1, -1)], dim=0)
        elif num_frames > desired_frames:

            video_resized = video_resized[:desired_frames]


        video_normalized = video_resized.permute(3, 0, 1, 2) / 255.0


        video_tensor = torch.cat([frame.unsqueeze(0) for frame in video_normalized], dim=0)

        return video_tensor





video_dataset = VideoDataset(video_dir="/content/drive/MyDrive/videos/..", df=df)


video, target = video_dataset[0]
print("Video tensor shape:", video.shape)




In [10]:

# Определение параметров
batch_size = 12
validation_split = 0.2  # Доля данных, выделяемых под валидацию

# Создание DataLoader для исходного датасета
dataloader = DataLoader(video_dataset, batch_size=batch_size, shuffle=True)

# Вычисление размера валидационного набора данных
val_size = int(len(video_dataset) * validation_split)
train_size = len(video_dataset) - val_size

# Разделение датасета на тренировочный и валидационный наборы данных
train_dataset, val_dataset = random_split(video_dataset, [train_size, val_size])

# Создание DataLoader для тренировочного и валидационного наборов данных
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

# Проверка размеров полученных датасетов и батчей
print("Train dataset size:", len(train_dataset))
print("Validation dataset size:", len(val_dataset))

Train dataset size: 1874
Validation dataset size: 468


In [None]:
model = r2plus1d_18(pretrained=True)

# Определение устройства (например, GPU, если доступно)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Перенос модели на выбранное устройство
model = model.to(device)

# Проверка устройства, на котором работает модель
print("Device:", device)


In [11]:


criterion = nn.CrossEntropyLoss()
optimizer_lr = 0.0001
optimizer = optim.AdamW(model.parameters(), lr=optimizer_lr)
num_epochs = 10

In [12]:
num_epochs = 10

for epoch in range(num_epochs):
    print("---------------------epoch:{}/{}---------------------".format(epoch+1, num_epochs))

    # Train on the training data
    model.train()
    train_loss = 0.0
    train_correct = 0

    start_time = time.time()

    for i, (inputs, labels) in enumerate(tqdm(train_dataloader)):
        inputs = inputs.to(device)
        labels = labels.to(device)
        model = model.to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs.to(device))

        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_correct += (predicted == labels).sum().item()

    train_loss /= len(train_dataloader.dataset)
    train_accuracy = 100.0 * train_correct / len(train_dataloader.dataset)

    # Assign encoded labels to 'target' column in the DataFrame
    df['target'] = encoded_labels

    # Validate on the test data
    model.eval()
    test_loss = 0.0
    test_correct = 0

    with torch.no_grad():
        for inputs, labels in val_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            test_correct += (predicted == labels).sum().item()

    test_loss /= len(val_dataloader.dataset)
    test_accuracy = 100.0 * test_correct / len(val_dataloader.dataset)

    end_time = time.time()
    elapsed_time = end_time - start_time

    # Print the results after each epoch
    print(f"\nepoch: {epoch+1}, lr_rate {optimizer.param_groups[0]['lr']:.4f}")
    print(f"loss_train: {train_loss:.4f} | loss_valid: {test_loss:.4f}")
    print(f"metric {test_accuracy}")
    print(f"Elapsed time: {time.strftime('%H:%M:%S', time.gmtime(elapsed_time))}")
    print("-----------------------------------------------")

    scheduler.step()


---------------------epoch:1/10---------------------


100%|██████████| 157/157 [21:40<00:00,  8.29s/it]



epoch: 1, lr_rate 0.0001
loss_train: 0.2745 | loss_valid: 0.1823
metric 43.58974358974359
Elapsed time: 00:25:42
-----------------------------------------------


NameError: ignored

In [13]:
num_epochs = 10

for epoch in range(num_epochs):
    print("---------------------epoch:{}/{}---------------------".format(epoch+1, num_epochs))

    # Train on the training data
    model.train()
    train_loss = 0.0
    train_correct = 0

    start_time = time.time()

    for i, (inputs, labels) in enumerate(tqdm(train_dataloader)):
        inputs = inputs.to(device)
        labels = labels.to(device)
        model = model.to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs.to(device))

        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        train_correct += (predicted == labels).sum().item()

    train_loss /= len(train_dataloader.dataset)
    train_accuracy = 100.0 * train_correct / len(train_dataloader.dataset)

    # Assign encoded labels to 'target' column in the DataFrame
    df['target'] = encoded_labels

    # Validate on the test data
    model.eval()
    test_loss = 0.0
    test_correct = 0

    with torch.no_grad():
        for inputs, labels in val_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            test_correct += (predicted == labels).sum().item()

    test_loss /= len(val_dataloader.dataset)
    test_accuracy = 100.0 * test_correct / len(val_dataloader.dataset)

    end_time = time.time()
    elapsed_time = end_time - start_time

    # Print the results after each epoch
    print(f"\nepoch: {epoch+1}, lr_rate {optimizer.param_groups[0]['lr']:.4f}")
    print(f"loss_train: {train_loss:.4f} | loss_valid: {test_loss:.4f}")
    print(f"metric {test_accuracy}")
    print(f"Elapsed time: {time.strftime('%H:%M:%S', time.gmtime(elapsed_time))}")
    print("-----------------------------------------------")




---------------------epoch:1/10---------------------


100%|██████████| 157/157 [10:47<00:00,  4.13s/it]



epoch: 1, lr_rate 0.0001
loss_train: 0.0583 | loss_valid: 0.1713
metric 49.35897435897436
Elapsed time: 00:12:20
-----------------------------------------------
---------------------epoch:2/10---------------------


100%|██████████| 157/157 [10:41<00:00,  4.09s/it]



epoch: 2, lr_rate 0.0001
loss_train: 0.0229 | loss_valid: 0.1903
metric 50.427350427350426
Elapsed time: 00:12:14
-----------------------------------------------
---------------------epoch:3/10---------------------


100%|██████████| 157/157 [10:40<00:00,  4.08s/it]



epoch: 3, lr_rate 0.0001
loss_train: 0.0114 | loss_valid: 0.1976
metric 51.282051282051285
Elapsed time: 00:12:13
-----------------------------------------------
---------------------epoch:4/10---------------------


100%|██████████| 157/157 [10:43<00:00,  4.10s/it]



epoch: 4, lr_rate 0.0001
loss_train: 0.0116 | loss_valid: 0.2109
metric 50.427350427350426
Elapsed time: 00:12:16
-----------------------------------------------
---------------------epoch:5/10---------------------


100%|██████████| 157/157 [10:51<00:00,  4.15s/it]



epoch: 5, lr_rate 0.0001
loss_train: 0.0175 | loss_valid: 0.2169
metric 47.863247863247864
Elapsed time: 00:12:28
-----------------------------------------------
---------------------epoch:6/10---------------------


  1%|          | 1/157 [00:08<21:18,  8.19s/it]


KeyboardInterrupt: ignored