In [1]:
import os
import numpy as np
import pickle

In [2]:
%pwd


'/teamspace/studios/this_studio/src/notebooks'

In [3]:
os.chdir("../../")

In [4]:
%pwd


'/teamspace/studios/this_studio'

In [7]:


# Set paths - replace these with your actual dataset paths
BASE_PATH = "raw"
TRAIN_SPLIT_PATH = os.path.join(BASE_PATH, "Action_Regnition_splits/train_001.txt")
TEST_SPLIT_PATH = os.path.join(BASE_PATH, "Action_Regnition_splits/test_001.txt")
FEATURES_PATH = os.path.join(BASE_PATH, "features")  # Path where your pre-extracted features are stored
OUTPUT_PATH = os.path.join(BASE_PATH, "mil_bags")

In [26]:
def parse_split_file(split_file_path):
    # "Burglary", "Explosion", "Fighting", "RoadAccidents", "Robbery", "Shooting", "Shoplifting", "Stealing", "Vandalism"
    videos = []
    labels = []
    class_names = ["Normal", "Abuse", "Arrest", "Arson", "Assault"]
    class_map = {name: idx for idx, name in enumerate(class_names)}
    print(f"Class map: ===> {class_map}")
    
    with open(split_file_path, 'r') as f:
        for line in f:
            parts = line.strip().split("/")
            # print(f"Print parts: ===> {parts}")

            if len(parts) >= 2:

                video_path = parts[1]
                category = parts[0]
                if category in class_map:
                    label = class_map[category]
                    videos.append(video_path)
                    labels.append(label)
    
    return videos, labels


Class map: ===> {'Normal': 0, 'Abuse': 1, 'Arrest': 2, 'Arson': 3, 'Assault': 4}
('Abuse013_x264.mp4', 1)


In [None]:


# Organize features into MIL bags
def organize_into_bags():
    # Parse split files
    train_videos, train_labels = parse_split_file(TRAIN_SPLIT_PATH)
    test_videos, test_labels = parse_split_file(TEST_SPLIT_PATH)
    
    print(f"Found {len(train_videos)} training videos and {len(test_videos)} testing videos")
    
    # Create training bags
    train_bags = []
    train_bag_labels = []
    
    for i, (video_name, label) in enumerate(zip(train_videos, train_labels)):
        # Get video ID/name from the path
        video_id = os.path.splitext(os.path.basename(video_name))[0]
        
        # Load pre-extracted features for this video
        feature_path = os.path.join(FEATURES_PATH, f"{video_id}.npy")
        
        if os.path.exists(feature_path):
            features = np.load(feature_path)
            train_bags.append(features)
            train_bag_labels.append(label)
            print(f"Added training bag {i+1}/{len(train_videos)}: {video_id}, shape: {features.shape}")
        else:
            print(f"Warning: Features not found for {video_id}")
    
    # Create testing bags
    test_bags = []
    test_bag_labels = []
    
    for i, (video_name, label) in enumerate(zip(test_videos, test_labels)):
        # Get video ID/name from the path
        video_id = os.path.splitext(os.path.basename(video_name))[0]
        
        # Load pre-extracted features for this video
        feature_path = os.path.join(FEATURES_PATH, f"{video_id}.npy")
        
        if os.path.exists(feature_path):
            features = np.load(feature_path)
            test_bags.append(features)
            test_bag_labels.append(label)
            print(f"Added testing bag {i+1}/{len(test_videos)}: {video_id}, shape: {features.shape}")
        else:
            print(f"Warning: Features not found for {video_id}")
    
    # Save the organized bags
    print("Saving organized MIL bags...")
    
    with open(os.path.join(OUTPUT_PATH, 'train_bags.pkl'), 'wb') as f:
        pickle.dump((train_bags, train_bag_labels), f)
    
    with open(os.path.join(OUTPUT_PATH, 'test_bags.pkl'), 'wb') as f:
        pickle.dump((test_bags, test_bag_labels), f)
    
    print(f"Data organization complete!")
    print(f"Created {len(train_bags)} training bags and {len(test_bags)} testing bags")
    print(f"Saved to {OUTPUT_PATH}")
    
    # Print class distribution
    classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    class_names = ["Normal", "Abuse", "Arrest", "Arson", "Assault", "Burglary", 
                  "Explosion", "Fighting", "RoadAccidents", "Robbery", 
                  "Shooting", "Shoplifting", "Stealing", "Vandalism"]
    
    print("\nTraining class distribution:")
    for c, name in zip(classes, class_names):
        count = train_bag_labels.count(c)
        print(f"{name}: {count} videos")
    
    print("\nTesting class distribution:")
    for c, name in zip(classes, class_names):
        count = test_bag_labels.count(c)
        print(f"{name}: {count} videos")

if __name__ == "__main__":
    organize_into_bags()