In [1]:
import pandas as pd
import numpy as np
import cv2
import torch
import os

In [2]:
# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: NVIDIA GeForce GTX 1650 Ti


In [3]:
# Example of transferring a tensor to GPU
tensor_on_gpu = torch.Tensor([1.0, 2.0]).to(device)

In [4]:
annot = pd.read_csv("data/slovo/annotations.csv", sep='\t')
annot.head(5)

Unnamed: 0,attachment_id,text,user_id,height,width,length,train
0,44e8d2a0-7e01-450b-90b0-beb7400d2c1e,Ё,185bd3a81d9d618518d10abebf0d17a8,1920,1080,76.0,True
1,df5b08f0-41d1-4572-889c-8b893e71069b,А,185bd3a81d9d618518d10abebf0d17a8,1920,1080,40.0,True
2,17f53df4-c467-4aff-9f48-20687b63d49a,Р,185bd3a81d9d618518d10abebf0d17a8,1920,1080,57.0,True
3,e3add916-c708-4339-ad98-7e2740be29e9,Е,185bd3a81d9d618518d10abebf0d17a8,1920,1080,64.0,True
4,bd7272ed-1850-48f1-a2a8-c8fed523dc37,Ч,185bd3a81d9d618518d10abebf0d17a8,1920,1080,84.0,True


In [5]:
labels = annot['text'].sample(10)
labels.values

array(['много', 'стоять', 'адаптивное поведение', 'переваривать',
       'расслабление', 'аккуратный', 'обучать', 'расписание', 'отчаянный',
       'наружу'], dtype=object)

In [6]:
train150 = annot.query("text in @labels and train")
train150

Unnamed: 0,attachment_id,text,user_id,height,width,length,train
1870,2590430a-cddf-460c-83dc-5a9b75f5a836,аккуратный,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,39.0,True
1886,d988c0b8-8418-47e3-8f07-89a109ff2023,адаптивное поведение,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,54.0,True
1989,507d6f3c-f2b9-4411-8b0d-e6c4ef168725,аккуратный,0211b488644476dd0fec656ccb9b74fc,1920,1080,23.0,True
2013,f7693961-c80f-4e38-afc9-5c32ea34c479,аккуратный,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,45.0,True
2027,e44625da-e950-41aa-91b2-d3303576d7eb,аккуратный,185bd3a81d9d618518d10abebf0d17a8,1920,1080,25.0,True
...,...,...,...,...,...,...,...
14608,a6c18197-dbf4-4d2f-a941-99800cc3ca57,расписание,b07a773bcb10b4f14f33d2b0e8ec58ba,720,1280,75.0,True
14657,275c7816-5d3b-4396-a084-e1b24a466d6b,расписание,46dd04a1caa75ed3082b573cb5a3ad26,1920,822,55.0,True
14712,c9837056-17a2-4e53-9883-feca2ca26203,расписание,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,59.0,True
14800,0ceb887d-ee4b-4826-8074-135c1f6ff399,расписание,95af8e702c909eee7145c6dc1a3d756b,1280,720,57.0,True


In [7]:
val50 = annot.query("text in @labels and not train")
val50.shape

(50, 7)

In [9]:
def crop_frame(frame):
    """
    Crops the frame to a square shape
    :param frame: frame to crop
    :return: cropped frame
    """
    height, width = frame.shape[:2]
    th_dim = frame.shape[2]
    max_dim = max(height, width)
    dif = abs(height-width)

    first_side = dif // 2
    second_side = dif - first_side
    
    
    if width == max_dim:
        f_array = np.zeros(shape=(first_side, max_dim, th_dim))
        s_array = np.zeros(shape=(second_side, max_dim, th_dim))
        frame = np.concatenate((f_array, np.array(frame), s_array), axis=0)
    else:
        f_array = np.zeros(shape=(max_dim, first_side, th_dim))
        s_array = np.zeros(shape=(max_dim, second_side, th_dim))
        frame = np.concatenate((f_array, np.array(frame), s_array), axis=1)

    return frame

In [10]:
def load_video(path, img_size):
    """
    Loads the video from the path and returns a list of frames
    :param path: path to the video
    :param img_size: size of the image
    :param i: index of the video
    :return: list of frames
    """
    cap = cv2.VideoCapture(path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        else:
            frame = crop_frame(frame)
            frame = cv2.resize(frame, (img_size, img_size))
            frame = frame[:, :, [2, 1, 0]]
            frame_tensor = torch.Tensor(frame).permute(2, 0, 1).to(device)
            frames.append(frame_tensor)
    return frames

In [12]:
from pathlib import Path

tensor_dir = "data/tensors"
Path(tensor_dir).mkdir(parents=True, exist_ok=True)

In [13]:
def process_and_save_tensors(annot_subset, subset_name, subdir_name = "train"):
    """
    Processes and saves tensors to disk
    :param annot_subset: DataFrame with annotations
    :param subset_name: name of the subset
    :return: None
    """
    i = 0
    for ind, row in annot_subset.iterrows():
        path = row['attachment_id']
        full_path = "data/slovo/" + str(subdir_name) + "/" + str(path) + ".mp4"

        # Load and process video
        frames = load_video(full_path, 100)
    
        # Save tensor to disk and store path in DataFrame
        tensor_dir = "data/tensors"
        tensor_path = os.path.join(tensor_dir, f"{subset_name}_{path}.pt")
        torch.save(frames, tensor_path)
        annot_subset.loc[ind, 'attachment_id'] = str(tensor_path)
        
        i += 1
        if i <= 150 and i % 10 == 0:
            print(f"We are done on the image number {i}")
    
    # Save DataFrame to CSV
    annot_subset.to_csv(f"data/processed_annotations_{subset_name}.csv", index=False)


In [14]:
# Process and save tensors for training and validation subsets
process_and_save_tensors(train150, "train")

We are done on the image number 10
We are done on the image number 20
We are done on the image number 30
We are done on the image number 40
We are done on the image number 50
We are done on the image number 60
We are done on the image number 70
We are done on the image number 80
We are done on the image number 90
We are done on the image number 100
We are done on the image number 110
We are done on the image number 120
We are done on the image number 130
We are done on the image number 140
We are done on the image number 150


In [15]:
proc_annot = pd.read_csv("data/processed_annotations_train.csv")
proc_annot.head()

Unnamed: 0,attachment_id,text,user_id,height,width,length,train
0,data/tensors\train_2590430a-cddf-460c-83dc-5a9...,аккуратный,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,39.0,True
1,data/tensors\train_d988c0b8-8418-47e3-8f07-89a...,адаптивное поведение,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,54.0,True
2,data/tensors\train_507d6f3c-f2b9-4411-8b0d-e6c...,аккуратный,0211b488644476dd0fec656ccb9b74fc,1920,1080,23.0,True
3,data/tensors\train_f7693961-c80f-4e38-afc9-5c3...,аккуратный,db573f94204e56e0cf3fc2ea000e5bdc,1280,720,45.0,True
4,data/tensors\train_e44625da-e950-41aa-91b2-d33...,аккуратный,185bd3a81d9d618518d10abebf0d17a8,1920,1080,25.0,True


In [16]:
process_and_save_tensors(val50, "valid", "test")

We are done on the image number 10
We are done on the image number 20
We are done on the image number 30
We are done on the image number 40
We are done on the image number 50


In [17]:
# Load training annotations
annot_train150 = pd.read_csv("data/processed_annotations_train.csv")

# Load the first tensor
first_train_tensor_path = annot_train150['attachment_id'].iloc[0]
first_train_tensor = torch.load(first_train_tensor_path)

In [18]:
len(first_train_tensor)

39

In [19]:
if torch.isnan(first_train_tensor[0]).any():
    print("The first frame contains NaN values!")
elif torch.equal(first_train_tensor[0], torch.zeros_like(first_train_tensor[0])):
    print("The first frame is all zeros!")
else:
    print("The first frame is not NaN or zeros.")


The first frame is not NaN or zeros.


In [20]:
# Load validation annotations
annot_val50 = pd.read_csv("data/processed_annotations_valid.csv")

# Load the first tensor
first_val_tensor_path = annot_train150['attachment_id'].iloc[0]
first_val_tensor = torch.load(first_val_tensor_path)

In [21]:
len(first_val_tensor)

39

In [22]:
if torch.isnan(first_val_tensor[0]).any():
    print("The first frame contains NaN values!")
elif torch.equal(first_val_tensor[0], torch.zeros_like(first_val_tensor[0])):
    print("The first frame is all zeros!")
else:
    print("The first frame is not NaN or zeros.")


The first frame is not NaN or zeros.


In [23]:
# torch.cuda.empty_cache()