In [1]:
import pandas as pd
import numpy as np

train_df = pd.read_csv('/kaggle/input/beginners-hypothesis-25/BH25/Training_Data/train.csv')

print(train_df)

# the training csv file is loaded


      video_id element    motion power  speed          video_summary
0            1    Erde    linear   rot    9.6     (3.8147, 31.94809)
1            2   Feuer       shm  grin    9.6   (26.70288, -4.29153)
2            3   Feuer    random  geld    9.6      (3.8147, 8.58307)
3            4    Erde    zigzag  lila    6.9  (-24.79553, -0.95367)
4            5    Erde    linear  lila    9.6    (7.62939, 22.88818)
...        ...     ...       ...   ...    ...                    ...
9995      9996   Feuer    zigzag   rut    9.6  (-1.90735, -11.44409)
9996      9997  Wasser       shm  gelb    9.6        (0.0, 13.14282)
9997      9998   Feuer  circular  lela    6.9   (17.64297, 10.49042)
9998      9999   Boden    zigzag  gele    9.6  (-21.45767, 11.44409)
9999     10000    Luft    zigzag  gele    6.9   (-18.11981, -6.4373)

[10000 rows x 6 columns]


In [2]:
import torch 
import cv2
import torchvision.models as models
from torchvision import transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# all computation took place in gpu, I used kaggle to access GPU P100

# loading resnet50 model
resnet50=models.resnet50(pretrained=True).to(device)
resnet50=torch.nn.Sequential(*list(resnet50.children()))[:-1]
resnet50.eval() # set the model to evaluation mode

# transformation of frames
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



def extract_video_features(video_path):
    cap= cv2.VideoCapture(video_path)
    frame_features=[]

    while True:
        ret, frame= cap.read()
        if not ret:
            break
        try:
            frame=cv2.GaussianBlur(frame,(3,3),0)
            frame= transform(frame).unsqueeze(0).to(device)
            with torch.no_grad():
                features = resnet50(frame).squeeze().cpu().numpy() 
            frame_features.append(features)
        except Exception:
            print(f"Error processing frame in {video_path}") # prints error if the video is not accessible due to any reason
    cap.release()


    return np.mean(frame_features, axis=0) 
"""This computes the average of all extracted feature vectors in the axis column from the frames of the video 
Taking the mean for all the frames  reduces the dimensionality from a list of vectors (one per frame) to a single vector"""

# this is the function to read the video frames and extract features

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 194MB/s]


In [None]:
import numpy as np
import pandas as pd



video_base_path = "path to train video folder"  # Path to the folder containing video files

data = []

for idx, row in train_df.iterrows():
    video_path = f"{video_base_path}/{row['video_id']}.mp4"
    if (idx+1)%100==0:
        print(f"precessing {video_path}")
    try:
        # Extract features for the video
        features = extract_video_features(video_path)
        data.append(features) 
    except Exception as e:
        print(f"Error processing {video_path}: {e}")
        continue

video_features = np.array(data)  # Extract features stored in data list is converted to array
print("done")

# this loop is used to check all the video and apply the "extract_vidoe_feature" function to it and store the extracted features inthe video_features array


precessing /kaggle/input/beginners-hypothesis-25/BH25/Testing_Data/100.mp4
precessing /kaggle/input/beginners-hypothesis-25/BH25/Testing_Data/200.mp4


In [None]:
print("Features Shape (X):", video_features.shape) # checked the dimension of the array
np.save("path to save folder.npy", video_features) # saved the features in with .npy