In [1]:
import pandas as pd
import numpy as np
import cv2
import torch
import os

In [2]:
# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: NVIDIA GeForce GTX 1650 Ti


In [3]:
# Example of transferring a tensor to GPU
tensor_on_gpu = torch.Tensor([1.0, 2.0]).to(device)

In [4]:
annot = pd.read_csv("data/annotations.csv", sep='\t')
annot.head(5)

Unnamed: 0,attachment_id,text,user_id,height,width,length,train,begin,end
0,44e8d2a0-7e01-450b-90b0-beb7400d2c1e,Ё,185bd3a81d9d618518d10abebf0d17a8,1920,1080,156.0,True,36,112
1,df5b08f0-41d1-4572-889c-8b893e71069b,А,185bd3a81d9d618518d10abebf0d17a8,1920,1080,150.0,True,36,76
2,17f53df4-c467-4aff-9f48-20687b63d49a,Р,185bd3a81d9d618518d10abebf0d17a8,1920,1080,133.0,True,40,97
3,e3add916-c708-4339-ad98-7e2740be29e9,Е,185bd3a81d9d618518d10abebf0d17a8,1920,1080,144.0,True,43,107
4,bd7272ed-1850-48f1-a2a8-c8fed523dc37,Ч,185bd3a81d9d618518d10abebf0d17a8,1920,1080,96.0,True,20,70


In [5]:
# Use a subset of 100 items for training
train_annot = annot[annot['train']].sample(100).reset_index(drop=True)

# Use a subset of 100 items for validation
valid_annot = annot[~annot['train']].sample(100).reset_index(drop=True)

In [6]:
train_annot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   attachment_id  100 non-null    object 
 1   text           100 non-null    object 
 2   user_id        100 non-null    object 
 3   height         100 non-null    int64  
 4   width          100 non-null    int64  
 5   length         100 non-null    float64
 6   train          100 non-null    bool   
 7   begin          100 non-null    int64  
 8   end            100 non-null    int64  
dtypes: bool(1), float64(1), int64(4), object(3)
memory usage: 6.5+ KB


In [7]:
valid_annot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   attachment_id  100 non-null    object 
 1   text           100 non-null    object 
 2   user_id        100 non-null    object 
 3   height         100 non-null    int64  
 4   width          100 non-null    int64  
 5   length         100 non-null    float64
 6   train          100 non-null    bool   
 7   begin          100 non-null    int64  
 8   end            100 non-null    int64  
dtypes: bool(1), float64(1), int64(4), object(3)
memory usage: 6.5+ KB


In [8]:
def crop_frame(frame):
    """
    Crops the frame to a square shape
    :param frame: frame to crop
    :return: cropped frame
    """
    height, width = frame.shape[:2]
    th_dim = frame.shape[2]
    max_dim = max(height, width)
    dif = abs(height-width)

    first_side = dif // 2
    second_side = dif - first_side
    
    
    if width == max_dim:
        f_array = np.zeros(shape=(first_side, max_dim, th_dim))
        s_array = np.zeros(shape=(second_side, max_dim, th_dim))
        frame = np.concatenate((f_array, np.array(frame), s_array), axis=0)
    else:
        f_array = np.zeros(shape=(max_dim, first_side, th_dim))
        s_array = np.zeros(shape=(max_dim, second_side, th_dim))
        frame = np.concatenate((f_array, np.array(frame), s_array), axis=1)

    return frame

In [9]:
def load_video(path, img_size, i):
    """
    Loads the video from the path and returns a list of frames
    :param path: path to the video
    :param img_size: size of the image
    :param i: index of the video
    :return: list of frames
    """
    cap = cv2.VideoCapture(path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        else:
            frame = crop_frame(frame)
            frame = cv2.resize(frame, (img_size, img_size))
            frame = frame[:, :, [2, 1, 0]]
            frame_tensor = torch.Tensor(frame).permute(2, 0, 1).to(device)
            frames.append(frame_tensor)
            
    if i < 100 and i % 10 == 0 or i % 100 == 0:
        print(f"We are done on the image number {i}")
    return frames

In [10]:
# for i, path in enumerate(annot['attachment_id']):
#     full_path = "data/slovo/train/" + str(path) + ".mp4"
#     annot.at[i, 'attachment_id'] = load_video(full_path, 100, i)

In [11]:
tensor_dir = "data/tensors/"
os.makedirs(tensor_dir, exist_ok=True)

In [12]:
def process_and_save_tensors(annot_subset, subset_name):
    """
    Processes and saves tensors to disk
    :param annot_subset: DataFrame with annotations
    :param subset_name: name of the subset
    :return: None
    """
    for i, path in enumerate(annot_subset['attachment_id']):
        full_path = "data/slovo/train/" + str(path) + ".mp4"

        # Load and process video
        frames = load_video(full_path, 100, i)
    
        # Save tensor to disk and store path in DataFrame
        tensor_path = os.path.join(tensor_dir, f"{subset_name}_{path}.pt")
        torch.save(frames, tensor_path)
        annot_subset.at[i, 'attachment_id'] = tensor_path
    
    # Save DataFrame to CSV
    annot_subset.to_csv(f"data/processed_annotations_{subset_name}.csv", index=False)


In [13]:
# Process and save tensors for training and validation subsets
process_and_save_tensors(train_annot, "train")

We are done on the image number 0
We are done on the image number 10
We are done on the image number 20
We are done on the image number 30
We are done on the image number 40
We are done on the image number 50
We are done on the image number 60
We are done on the image number 70
We are done on the image number 80
We are done on the image number 90


In [14]:
process_and_save_tensors(valid_annot, "valid")

We are done on the image number 0
We are done on the image number 10
We are done on the image number 20
We are done on the image number 30
We are done on the image number 40
We are done on the image number 50
We are done on the image number 60
We are done on the image number 70
We are done on the image number 80
We are done on the image number 90


In [17]:
# Load your data
annot = pd.read_csv("data/processed_annotations_train.csv")

# Load the first tensor
first_tensor_path = annot['attachment_id'].iloc[0]
first_tensor = torch.load(first_tensor_path)

In [18]:
if torch.isnan(first_tensor[0]).any():
    print("The first frame contains NaN values!")
elif torch.equal(first_tensor[0], torch.zeros_like(first_tensor[0])):
    print("The first frame is all zeros!")
else:
    print("The first frame is not NaN or zeros.")


The first frame is not NaN or zeros.


In [None]:
# torch.cuda.empty_cache()