Task 1 code

In [13]:
import torch
import torchvision.models.video as models
import torchvision.transforms as transforms
import cv2
import numpy as np
import torch.nn as nn
import csv
from collections import defaultdict
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import os
import time


def task1(file_path,model_name):
    
    #visualise the video
    cap = cv2.VideoCapture(file_path)
    if(cap.isOpened() == False):
        print("error... cant open video")
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            cv2.imshow('Frame',frame)
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break
        else: break
    cap.release()
    cv2.destroyAllWindows()


    def getFrames(file_path):
        '''
        Reads the video frame by frame and returns a list of frames
        '''
        video = cv2.VideoCapture(file_path)
        frames = []
        while True:
            ret, frame = video.read()
            if not ret: break
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame_rgb)
        video.release()
        return frames

    def processFrames(frames,t):
        '''
        Processes and transforms the frames based on the transformation (t) provided
        '''
        processed = []
        for frame in frames:
            frame = t(frame)
            processed.append(frame)
        return torch.stack(processed)

    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((112,112)),
        transforms.ToTensor()  
    ])

    frames = getFrames(file_path)
    pro = processFrames(frames,transform)
    pro = pro.unsqueeze(0) # Add batch dimension , shape becomes [N,D,C,H,W] , N-batch, D-Depth, C-channels, Height(H) and Width(W) of frame
    pro1 = torch.movedim(pro,1,2) # convert to shape [N,C,D,H,W] which is the required input shape in r3d 
    
    def hook(module,input,output):
        global features
        features = output

    def initialize_model(model_name):
        '''
        Add hooks to the layer based on the model name provided
        '''
        global features
        model = models.r3d_18(pretrained=True)
        model.eval()
        if model_name == "layer3":
            h1 = model.layer3.register_forward_hook(hook)
        elif model_name == "layer4":
            h1 = model.layer4.register_forward_hook(hook)
        elif model_name == "avgpool":
            h1 = model.avgpool.register_forward_hook(hook)
        # pro1 = pro[0][None,:,:,:]
        out = model(pro1)
        h1.remove()

    initialize_model(model_name)

    if(features.shape[1] == 256): 
        # if model is layer 3, average spatial dimensions and then flatten the tensor followed by a linear transformation to get 512 dimensional tensor
        avg_features = torch.mean(features,dim=(3,4))
        squeezed = torch.squeeze(avg_features) #remove batch dimension
        in_tensor = torch.flatten(squeezed) #collapse into a single dimension 
        myLayer = nn.Linear(in_features=256*features.shape[2],out_features=512) # define a linear layer 
        final_tensor = myLayer(torch.squeeze(in_tensor)) #remove batch dimension with squeeze and then apply linear transformation
    elif(features.shape[1] == 512 and features.shape[2] != 1): # case of layer 4
        # average the tensor on dimension 2,3,4 to get 512 dimensional tensor
        final_tensor = torch.squeeze(torch.mean(features,dim=(2,3,4)))
    else: #case of avgpool
        #this layer will already give output as 512 dimensional tensor
        final_tensor = torch.squeeze(features)
    
    return final_tensor


# path = 'C:\\Users\\aryan\\Desktop\\ASU\\CSE 515\\Target_videos\\cartwheel\\cartwheel\\Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_ba_bad_8.avi'
# model = "avgpool"

# task1(path,model)



Task2 Code

In [14]:
def Task2(file_path,model_name):
    
    def load_all_cluster_representatives_hog():
        cluster_representatives = {}
        sigma2_values = [4, 8, 16, 32, 64, 128]
        tau2_values = [2, 4]
        pair_index = 0
        for sigma2 in sigma2_values:
            for tau2 in tau2_values:
                centroids_file = f'../HoG/pair_{sigma2}_{tau2}_HoG.csv'
                centroids = np.loadtxt(centroids_file, delimiter=',')
                cluster_representatives[(sigma2, tau2)] = centroids
                pair_index += 1
        return cluster_representatives
    
    def load_all_cluster_representatives_hof():
        cluster_representatives = {}
        sigma2_values = [4, 8, 16, 32, 64, 128]
        tau2_values = [2, 4]
        pair_index = 0
        for sigma2 in sigma2_values:
            for tau2 in tau2_values:
                centroids_file = f'../HoF/pair_{sigma2}_{tau2}_HoF.csv'
                centroids = np.loadtxt(centroids_file, delimiter=',')
                cluster_representatives[(sigma2, tau2)] = centroids
                pair_index += 1
        return cluster_representatives
    
    def extract_features(file_name):
        data_array = []
        with open(file_name, 'r') as csvfile:
            csvreader = csv.reader(csvfile, delimiter=',')
            for row in csvreader:
                data_array.append([float(x) for x in row])
        data_array = np.array(data_array)       
        return data_array

    def get_sigma2_tau2_pair(row):
        sigma2 = row[4]
        tau2 = row[5]   
        return (sigma2, tau2)

    def assign_row_to_cluster(features_row, cluster_representatives, sigma2_tau2_pair):
        centroids = cluster_representatives[sigma2_tau2_pair] 
        distances = cdist([features_row], centroids, 'euclidean') 
        closest_cluster = np.argmin(distances) 
        return closest_cluster


    def create_and_concatenate_histograms(hist_data):
        expected_pairs = [
            (4, 2), (4, 4), (8, 2), (8, 4), (16, 2), (16, 4), 
            (32, 2), (32, 4), (64, 2), (64, 4), (128, 2), (128, 4)
        ]
        grouped_indices = {pair: [] for pair in expected_pairs}
        for (sigma2_tau2, index) in hist_data:
            grouped_indices[sigma2_tau2].append(index)
        histograms = []
        for sigma2_tau2, indices in grouped_indices.items():
            histogram, _ = np.histogram(indices, bins=np.arange(41), density=False)
            histograms.append(histogram)
        if len(histograms) != 12:
            raise ValueError(f"Expected 12 histograms, but found {len(histograms)}.")
        concatenated_vector = np.hstack(histograms)
        return concatenated_vector

    stips_folder = "../Assets/hmdb51_org_stips_filtered"
    action_subfolder = os.path.basename(os.path.dirname(file_path))  # Get the subfolder (action) name
    video_name = os.path.basename(file_path)  # Get the video filename (e.g., videoname.avi)
    # Create the corresponding STIP file name by appending '.csv' to the video filename
    stip_file_name = f"{video_name}.csv"
    # Construct the full path to the STIP file
    video_stip_path = os.path.join(stips_folder, action_subfolder, stip_file_name)
    
    def Task2b():
        cluster_representatives = load_all_cluster_representatives_hog()
        stip_features = extract_features(video_stip_path)
        hist_data = []
        for row in stip_features:
            ind1 = assign_row_to_cluster(row[7:79], cluster_representatives, get_sigma2_tau2_pair(row))
            hist_data.append([get_sigma2_tau2_pair(row), ind1])
        # create_histogram_for_pairs(hist_data)
        bog_hog_480 = create_and_concatenate_histograms(hist_data)
        # print("Concatenated 480-dimensional vector:", bog_hog_480)
        # print("Shape of the concatenated vector:", bog_hog_480.shape)
        return bog_hog_480
    
    def Task2c():
        cluster_representatives = load_all_cluster_representatives_hof()
        stip_features = extract_features(video_stip_path)
        hist_data = []
        for row in stip_features:
            ind1 = assign_row_to_cluster(row[79:], cluster_representatives, get_sigma2_tau2_pair(row))
            hist_data.append([get_sigma2_tau2_pair(row), ind1])
        # Concatenate the 12 histograms into a 480-dimensional vector
        bog_hof_480 = create_and_concatenate_histograms(hist_data)
        # print("Concatenated 480-dimensional vector:", bog_hof_480)
        # print("Shape of the concatenated vector:", bog_hof_480.shape)
        return bog_hof_480

    if model_name == "hog":
        final = Task2b()
        return final
    elif model_name =="hof":
        final = Task2c()
        return final
        

Task3 Code

In [15]:

def task3(path, r, n, frames_folder='Frames', feature_folder='Features'):

    os.makedirs(frames_folder, exist_ok=True) # Folder for saving frames
    os.makedirs(feature_folder, exist_ok=True)  # Folder for saving features

    base_name = os.path.basename(path).split('.')[0] # This is to get the video name and not path

    cam = cv2.VideoCapture(path)
    
    # Here we are counting total frames in the video
    total_frames=int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    
    frameno = 0
    while(True):

        ret,frame = cam.read()
        if ret:
            frameno=frameno+1  # Counter for frames
            if(frameno==1):  # If and else condition for reaching first,middle and last frame and saving them
                frame_name = os.path.join(frames_folder, f'{base_name}_frame_1.jpg')  # Save with base name
                print("First frame: "+frame_name)
                cv2.imwrite(frame_name,frame)
            elif (frameno==int(total_frames/2)):
                frame_name = os.path.join(frames_folder, f'{base_name}_frame_2.jpg')  # Save with base name
                print("Middle frame: "+frame_name)
                cv2.imwrite(frame_name,frame)
            elif (frameno==int(total_frames)-1):
                frame_name = os.path.join(frames_folder, f'{base_name}_frame_3.jpg')  # Save with base name
                print("Last frame: "+frame_name)
                cv2.imwrite(frame_name,frame)
                # Will skip rest of the frames
        else:
            break # If no more frames are found in video we exit
    
    cam.release()
    cv2.destroyAllWindows()
    
#     r = 4  # here we can take value of r and n as inputs incase if user wants to define them
#     n = 12
    histos = []
    video_feature_vector = [] # This is a list for feature vectors of concatenated histograms
    
    for frame_index in range(1, 4):  # first loop to go through first,middle and last frame
        image_name = os.path.join(frames_folder, f'{base_name}_frame_{frame_index}.jpg')
        img = cv2.imread(image_name)
        if img is None:
            print(f"Error reading {image_name}. File may not exist or is corrupted.")
            continue  # Skip to the next frame
        
        im_h, im_w, channels = img.shape  # Reading the image and extracting its height and width
        
        figure, axis = plt.subplots(r, r)  # This is for creating figure of attached cell histograms
        plt.subplots_adjust(top=1,bottom=0.5,right=1,left=0.5)
        
        # Feature vector for the current frame
        frame_feature_vector = []
        
        for i in range(1,r+1): # This loop is for going from top to bottom of image
            for j in range(1,r+1):  # This loop is for going from left to right of image
                tile = img[(im_h//r)*(i-1):(im_h//r)*i,(im_w//r)*(j-1):(im_w//r)*j]# This is cutting
                # out cells based on dimensions of the image
                rgb_tile = cv2.cvtColor(tile, cv2.COLOR_BGR2RGB) # Converting cells/tiles from bgr to rgb
                
                # Now we calculate histograms for red,green and blue channels of the cell
                hist_r = np.histogram(rgb_tile[:,:,0].ravel(),bins=n)[0]
                hist_g = np.histogram(rgb_tile[:,:,1].ravel(),bins=n)[0]
                hist_b = np.histogram(rgb_tile[:,:,2].ravel(),bins=n)[0]
                # Concatenate histograms for all three channels into a single vector for this tile
                tile_histogram = np.concatenate([hist_r, hist_g, hist_b])
                # Append this tile's histogram to the frame feature vector
                frame_feature_vector.extend(tile_histogram)
                
                # Now we plot the histograms for n bins for tile and for red,green blue channel
                axis[i-1,j-1].hist(rgb_tile[:,:,0].ravel(),bins=n,edgecolor='black',color='red',alpha=0.5)
                axis[i-1,j-1].hist(rgb_tile[:,:,1].ravel(),bins=n,edgecolor='black',color='green',alpha=0.5)
                axis[i-1,j-1].hist(rgb_tile[:,:,2].ravel(),bins=n,edgecolor='black',color='blue',alpha=0.5)
                axis[i-1,j-1].tick_params(axis='both',labelsize=4)
        
        # Append this frame's feature vector to the video feature vector
        video_feature_vector.extend(frame_feature_vector)
        
        # following is to save the histogram based on details like timestamp and video it belongs to
        timestamp = int(time.time())  # here I used timestamp because if the name of histograms are same than it will
        # overwrite the saved histogram so with timestamp it gives unique name to each saved file
        hist_name = f'{base_name}_histogram_frame_{frame_index}_{timestamp}.png'
        hist_path = os.path.join("Outputs/Task3/Histograms_Framewise", hist_name)
        plt.savefig(hist_path)
        plt.close(figure)  # Close the figure after saving
        
        histos.append(hist_path)
        
    video_feature_vector = np.array(video_feature_vector) # Here we convert the video feature vector to np array
    # following is to save the feature vector
    feature_file = os.path.join(feature_folder,f'{base_name}_features.npy')
    np.save(feature_file,video_feature_vector)
    
    print(f"Saved feature vector for video '{base_name}' to {feature_file}")
    
    return video_feature_vector, histos


Performing Task 4

In [16]:
import os

# Path to the main folder containing action subfolders
main_folder = '../Assets/hmdb51_org/target_videos'

# List to store the paths of all video files
video_paths = []

# Walk through the folder structure
for root, dirs, files in os.walk(main_folder):
    for file in files:
        full_path = os.path.join(root, file)
        video_paths.append(full_path)

print(video_paths[0])

../Assets/hmdb51_org/target_videos\cartwheel\(Rad)Schlag_die_Bank!_cartwheel_f_cm_np1_le_med_0.avi


In [17]:
# Task2(video_paths[0],"hof")

For each target video, perform all 3 tasks and store their respective outputs

In [18]:
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

output_dir1 = "Outputs/Task1"
os.makedirs(output_dir1, exist_ok=True)  #Create directory if it doesn't exist
task1_models = ["layer3","layer4","avgpool"] # There are 3 visual models in task 1
for model in task1_models:
    os.makedirs(os.path.join(output_dir1, model), exist_ok=True)

output_dir2 = "Outputs/Task2"
os.makedirs(output_dir2, exist_ok=True)  
task2_models = ["hog","hof"]
for model in task2_models:
    os.makedirs(os.path.join(output_dir2, model), exist_ok=True)
    
# Saving Features for Task 2
def save_vector_to_file(vector, output_path):

    reshaped_vector = vector.reshape(1, -1)
    
    reshaped_vector_int = reshaped_vector.astype(int)
    
    np.savetxt(output_path, reshaped_vector_int, delimiter=',', fmt='%d')


output_dir3 = "Outputs/Task3"
os.makedirs(output_dir3, exist_ok=True)
task3_folders = ["Histograms_Framewise","Feature_Vector"]
for model in task3_folders:
    os.makedirs(os.path.join(output_dir3, model), exist_ok=True)

for video_path in video_paths[:2]:
    print(f"Processing {video_path}...")

    # Task1: Process the video for all 3 models
    for model in task1_models:
        output_tensor = task1(video_path, model)
        if output_tensor is not None:
            print(f"Task 1 ({model})....")
            # Create a filename for the output tensor based on the input video name
            video_name = os.path.basename(video_path).split('.')[0]
            # Save the output tensor in the corresponding model folder
            output_path = os.path.join(output_dir1, model, f"{video_name}_features.pt")
            torch.save(output_tensor, output_path)
            print(f"Saved Task 1 ({model}) features to {output_path}")

    # Task2: Process the video and save the output
    for model in task2_models:
        output_2 = Task2(video_path,model)
        if output_2 is not None:
            print("Task 2....")
            video_name = os.path.basename(video_path).split('.')[0]
            output_path = os.path.join(output_dir2, model, f"{video_name}.csv")
            save_vector_to_file(output_2, output_path)
            # torch.save(output_2, output_path)
            print(f"Saved Task 2 features to {output_path}")

    # Task3: Process the video and save the output
    feature_folder = os.path.join(output_dir3, "Feature_Vector")
    output_3, histograms = task3(video_path, 4, 12, frames_folder='frames_fold', feature_folder=feature_folder)
    if output_3 is not None:
        print(f"Video feature vector shape: {output_3.shape}")
        print(f"Histograms saved: {histograms}")

Processing ../Assets/hmdb51_org/target_videos\cartwheel\(Rad)Schlag_die_Bank!_cartwheel_f_cm_np1_le_med_0.avi...
Task 1 (layer3)....
Saved Task 1 (layer3) features to Outputs/Task1\layer3\(Rad)Schlag_die_Bank!_cartwheel_f_cm_np1_le_med_0_features.pt


KeyboardInterrupt: 

: 