In [1]:
%%capture
!pip install torch tqdm torchvision av ipywidgets imageio[ffmpeg] numpy pillow

## Preprocessing

In [2]:
import torch
from torchvision import transforms
from torchvision.io import read_video, read_video_timestamps
import os
from tqdm import tqdm
from PIL import Image
import io
import zlib
import os
import torch
from torchvision import transforms
from torchvision.io import read_video, read_video_timestamps
from tqdm import tqdm

In [None]:
def permute_tensor(tensor):
    return tensor.permute(2, 0, 1)

def extract_and_store_frames(video_path, tensor_output_folder, frame_rate=30, chunk_size=1000):
    """
    Extract frames from a video, convert them to tensors, and store them with frame IDs.
    Args:
    - video_path (str): Path to the video file.
    - tensor_output_folder (str): Folder to save the tensor files.
    - frame_rate (int): Interval of frames to be extracted (e.g., every 30th frame).
    - chunk_size (int): Number of frames to process at a time.
    """
    if not os.path.exists(tensor_output_folder):
        os.makedirs(tensor_output_folder)
    
    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Get video metadata
    pts, _ = read_video_timestamps(video_path, pts_unit='sec')
    total_frames = len(pts)
    
    # Define transformation
    transform = transforms.Compose([
        transforms.Lambda(permute_tensor),  # Custom permutation
        transforms.ConvertImageDtype(torch.float16),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Process frames in chunks
    frame_dict = {}
    for start_frame in tqdm(range(0, total_frames, chunk_size), desc="Processing chunks"):
        end_frame = min(start_frame + chunk_size, total_frames)
        video_chunk, _, _ = read_video(video_path, start_pts=pts[start_frame], end_pts=pts[end_frame-1], pts_unit='sec')
        
        for i in range(0, video_chunk.shape[0], frame_rate):
            frame_index = start_frame + i
            if frame_index >= total_frames:
                break
            frame_id = f"{os.path.basename(video_path).split('.')[0]}_frame_{frame_index}"
            
            # Debug print
            # print(f"Frame shape before transform: {video_chunk[i].shape}")
            
            frame_tensor = transform(video_chunk[i])
            
            # Move the tensor to the selected device (CPU or CUDA)
            frame_tensor = frame_tensor.to(device)
            
            # Debug print
            # print(f"Frame shape after transform: {frame_tensor.shape} on device: {device}")
            
            frame_dict[frame_id] = frame_tensor
        
        # Clear memory
        del video_chunk
        torch.cuda.empty_cache()
    
    # Save the dictionary
    tensor_path = os.path.join(tensor_output_folder, f"{os.path.basename(video_path).split('.')[0]}_frames.pt")
    torch.save(frame_dict, tensor_path)
    print(f"Processed {len(frame_dict)} frames from {video_path}")
    
    # Print total number of frames generated
    print(f"Total frames generated: {len(frame_dict)}")



In [None]:
def test_preprocess_single_video(video_path, tensor_output_folder, frame_rate=60):
    """
    Test the preprocessing of a single video.
    Args:
    - video_path (str): Path to the video file.
    - tensor_output_folder (str): Folder to save the tensor files.
    - frame_rate (int): Interval of frames to be extracted (e.g., every 60th frame).
    """
    extract_and_store_frames(video_path, tensor_output_folder, frame_rate)

In [None]:
os.makedirs("output", exist_ok = True)

In [None]:
test_preprocess_single_video('/kaggle/input/sample-bengaluru-mobility/18th_Crs_BsStp_JN_FIX_1_000.mp4', 'output', frame_rate=60)

## YOLOv8

### Reading tensors

In [None]:
import numpy as np
def read_frames_from_file(file_path):
    frame_dict = {}
    
    with open(file_path, 'rb') as f:
        while True:
            frame_id_length_bytes = f.read(4)
            if not frame_id_length_bytes:
                break
            
            frame_id_length = int.from_bytes(frame_id_length_bytes, byteorder='big')
            frame_id = f.read(frame_id_length).decode('utf-8')
            
            tensor_size_bytes = f.read(4)
            tensor_size = int.from_bytes(tensor_size_bytes, byteorder='big')
            
            tensor_data = f.read(tensor_size)
            tensor_array = np.frombuffer(tensor_data, dtype=np.uint8).reshape((3, 90, 160))  # Change shape as necessary
            
            tensor = torch.tensor(tensor_array, dtype=torch.float32) / 255.0
            frame_dict[frame_id] = tensor
    
    return frame_dict

In [None]:
fdict = read_frames_from_file("/kaggle/working/output/18th_Crs_BsStp_JN_FIX_1_000_frames.pt")

In [None]:
fdict

## Ultranalytics

In [3]:
%%capture
!pip install ultralytics

In [None]:
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from ultralytics import YOLO
import os

# Define the preprocessing transformation used during saving
preprocess = transforms.Compose([
    transforms.Resize((640, 640)),  # Ensure this matches the preprocessing used during saving
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def tensor_to_pil_image(tensor):
    """
    Convert a tensor to a PIL Image.
    
    Args:
    - tensor (torch.Tensor): Tensor to convert.
    
    Returns:
    - PIL.Image.Image: Converted PIL Image.
    """
    # Remove batch dimension and convert to CPU
    tensor = tensor.squeeze().cpu()
    # Convert tensor to PIL image
    pil_image = transforms.ToPILImage()(tensor)
    return pil_image

def load_tensors(tensor_file_path):
    """
    Load tensors from a file and return them as a dictionary.
    
    Args:
    - tensor_file_path (str): Path to the file containing the saved tensors.
    
    Returns:
    - dict: Dictionary with frame IDs as keys and tensors as values.
    """
    if not os.path.isfile(tensor_file_path):
        raise FileNotFoundError(f"The file {tensor_file_path} does not exist.")
    
    # Load the dictionary of tensors from the file
    tensor_dict = torch.load(tensor_file_path, map_location='cpu')
    
    return tensor_dict

def segment_and_visualize_frames(tensor_dict):
    """
    Run YOLO model on frames and visualize the results.
    
    Args:
    - tensor_dict (dict): Dictionary with frame IDs as keys and tensors as values.
    """
    model = YOLO('yolov8s.pt')  # Load the YOLO model
    
    for frame_id, frame_tensor in tensor_dict.items():
        # Convert tensor to PIL image
        frame_pil = tensor_to_pil_image(frame_tensor)
        
        # Run YOLO model on the image
        results = model(frame_pil)
        
        # Visualize results
        result_image = results[0].plot()
        
        # Display image
        plt.imshow(result_image)
        plt.title(f"Segmented Frame ID: {frame_id}")
        plt.axis('off')
        plt.show()

# Example usage
tensor_file_path = 'path/to/your/tensors.pt'
tensor_dict = load_tensors(tensor_file_path)
segment_and_visualize_frames(tensor_dict)


In [None]:
segment_and_visualize_frames(fdict)

## Attempt 2

In [5]:
def process_and_save_video(video_path, output_path):
    # Open the video file
    video_reader = imageio.get_reader(video_path)
    
    # Get total number of frames
    num_frames = video_reader.count_frames()

    tensor_list = []
    frame_count = 0
    frame_dict = {}
    
    # Use tqdm for progress bar
    for frame in tqdm(video_reader, total=num_frames, desc="Processing Frames"):
        # Convert the frame to a PIL Image
        pil_image = Image.fromarray(frame)
        
        # Preprocess the frame
        frame_tensor = preprocess(pil_image)
        frame_tensor = frame_tensor.unsqueeze(0)  # Add batch dimension

        tensor_list.append(frame_tensor)
        frame_dict[frame_count] = frame_tensor
        frame_count += 1

    # Concatenate tensors along the batch dimension
    all_tensors = torch.cat(tensor_list, dim=0)

    # Save the tensor to disk
    torch.save(all_tensors, output_path)
    print(f"Saved {frame_count} frames to {output_path}")

In [None]:
process_and_save_video('/kaggle/input/sample-bengaluru-mobility/18th_Crs_BsStp_JN_FIX_1_000.mp4', 'output_tensors.pt')

Processing Frames: 100%|██████████| 22500/22500 [10:18<00:00, 36.37it/s]
