# **SlowFast Network Model with APD Framework-Detailed Explanation of the Code Process**

# **Step 1: Library Installation and Imports**
# Install necessary libraries for object detection (YOLOv5), video processing (OpenCV), and deep learning (PyTorch).
# Import additional modules for lane detection, motion analysis, and video generation.

# **Step 2: Lane Detection**
# Uses Canny edge detection and Hough Line Transform to detect lane lines in video frames.
# This enhances the frame by highlighting the road lanes.

# **Step 3: Object Detection and Bounding Boxes**
# YOLOv5 model detects objects (e.g., cars, pedestrians) in each frame.
# Draws bounding boxes around detected objects and labels them with class names and confidence scores.

# **Step 4: Absolute Pixel Difference - Image Similarity Assessment**
# Calculates frame similarity using frame difference and thresholds.
# Frames with minimal changes are skipped to optimize processing time and computational resources.

# **Step 5: SlowFast Model Input Preparation**
# Prepares frames for the SlowFast model by creating "fast" and "slow" pathways.
# This facilitates detailed analysis of video content across multiple temporal scales.

# **Step 6: Video Processing Workflow**
# Reads video frames, applies lane detection, object detection, and motion analysis.
# Writes processed frames to the output video, skipping similar frames based on adaptive thresholds.

# **Step 7: Performance Metrics**
# Calculates and displays metrics like the total number of frames processed, skipped frame percentage, and FPS.
# These metrics provide insights into the efficiency of the video processing pipeline.

# **Step 8: Dynamic Video Generation**
# Saves the processed video with highlighted lanes, objects, and dynamically adapted frame rates.
# Output video is optimized for use in applications like traffic monitoring or surveillance.


In [None]:
# Set the video paths
video_path = '/content/00067cfb-e535423e.mov'  # Input video file path
output_video_path = '/content/processed_APD_video.mp4'  # Output video path

# Install required libraries
!pip install torch torchvision pytorchvideo opencv-python scikit-image ultralytics

import torch
import torchvision.transforms as transforms
import cv2
import numpy as np
from pytorchvideo.models.hub import slowfast_r50  # SlowFast
from ultralytics import YOLO  # For YOLOv5
import time  # To calculate FPS

# Load YOLOv5 model
yolo_model = YOLO("yolov5s.pt")

# Load the SlowFast model
model = slowfast_r50(pretrained=True)
model.eval()

# Transform for SlowFast model input
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.45, 0.45, 0.45], std=[0.225, 0.225, 0.225]),
])

# Function to detect lanes using Canny edge detection and Hough line transform
def detect_lanes(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)
    height, width = frame.shape[:2]
    roi_vertices = np.array([[(0.1 * width, height), (0.9 * width, height),
                              (0.55 * width, 0.6 * height), (0.45 * width, 0.6 * height)]], dtype=np.int32)
    mask = np.zeros_like(edges)
    cv2.fillPoly(mask, roi_vertices, 255)
    masked_edges = cv2.bitwise_and(edges, mask)
    lines = cv2.HoughLinesP(masked_edges, 1, np.pi / 180, threshold=50, minLineLength=50, maxLineGap=25)
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 5)
    return frame

# Function to draw bounding boxes and labels on the frame for detected objects
def draw_bounding_boxes(frame, results):
    class_names = yolo_model.names
    for obj in results:
        cls = int(obj.cls[0])
        label = class_names[cls]
        x1, y1, x2, y2 = map(int, obj.xyxy[0].tolist())
        confidence = obj.conf[0] * 100
        cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
        label_text = f"{label}: {confidence:.2f}%"
        cv2.putText(frame, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    return frame

# Function to detect and crop the closest object using YOLO
def detect_and_crop(image):
    results = yolo_model(image)
    image_with_boxes = draw_bounding_boxes(image.copy(), results[0].boxes)
    return image_with_boxes

# Function to calculate frame difference with motion thresholding
def calculate_frame_difference(frame1, frame2):
    # Calculate the absolute difference between frames
    diff = cv2.absdiff(frame1, frame2)

    # Thresholding to count significant changes
    _, diff = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)  # Ignore low-intensity changes
    non_zero_count = np.count_nonzero(diff)
    change_ratio = non_zero_count / frame1.size

    return 1 - change_ratio  # Higher values indicate lower motion

# Function to extract frames for SlowFast (fast and slow pathways)
def prepare_slowfast_inputs(frames, slow_rate=16):
    fast_pathway = frames
    slow_pathway = frames[::slow_rate]
    fast_tensor = torch.stack(fast_pathway).unsqueeze(0)
    slow_tensor = torch.stack(slow_pathway).unsqueeze(0)
    return [slow_tensor, fast_tensor]

# Process video with YOLO, SlowFast, and ADP for dynamic framerate
def process_video(input_path, output_path, adp_threshold=0.8750):
    cap = cv2.VideoCapture(input_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    prev_frame = None  # Track the last processed frame
    last_processed_index = -1  # To track the index of the last processed frame
    frame_count = 0
    skipped_frames = 0
    processed_frames = 0

    start_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_with_lanes = detect_lanes(frame)
        frame_with_objects = detect_and_crop(frame_with_lanes)
        transformed_frame = transform(frame_with_objects)

        if frame_count == 0:  # First frame, always process it
            prev_frame = frame_with_objects
            out.write(frame_with_objects)  # Write first frame
            processed_frames += 1
            last_processed_index = frame_count
        else:
            # Calculate similarity with the last processed frame
            difference = calculate_frame_difference(prev_frame, frame_with_objects)
            print(f"Comparing frame {last_processed_index} with frame {frame_count}: similarity score = {difference:.4f}")

            if difference < adp_threshold:  # Process the frame if the difference is low
                prev_frame = frame_with_objects  # Update previous frame to current
                processed_frames += 1
                out.write(frame_with_objects)  # Write processed frame
                last_processed_index = frame_count  # Update the last processed frame index
            else:  # Frame is similar, skip
                print(f"Skipping frame {frame_count}, difference: {difference:.4f}")
                skipped_frames += 1
                # Do not update prev_frame, so we compare the next unprocessed frame with the last processed frame

        frame_count += 1  # Increment frame count

    cap.release()
    out.release()

    end_time = time.time()
    total_time = end_time - start_time
    video_fps = processed_frames / total_time if total_time > 0 else 0

    total_frames = frame_count  # Count of all frames read
    metrics = {
        'Total Frames': total_frames,
        'No of Frames Skipped': skipped_frames,
        '% of Frames Skipped': (skipped_frames / total_frames) * 100 if total_frames > 0 else 0,
        'FPS': video_fps,
    }

    print("Metrics:")
    print(f"Total Frames = {metrics['Total Frames']}")
    print(f"No of Frames Skipped = {metrics['No of Frames Skipped']}")
    print(f"% of Frames Skipped = {metrics['% of Frames Skipped']:.2f}%")
    print(f"FPS = {metrics['FPS']:.2f}")

# Run the video processing function
process_video(video_path, output_video_path)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
0: 384x640 8 persons, 6 cars, 2 traffic lights, 351.7ms
Speed: 3.5ms preprocess, 351.7ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)
Comparing frame 114 with frame 115: similarity score = 0.9000
Skipping frame 115, difference: 0.9000

0: 384x640 13 persons, 5 cars, 2 traffic lights, 340.7ms
Speed: 5.2ms preprocess, 340.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Comparing frame 114 with frame 116: similarity score = 0.8277

0: 384x640 13 persons, 6 cars, 2 traffic lights, 349.6ms
Speed: 3.2ms preprocess, 349.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
Comparing frame 116 with frame 117: similarity score = 0.8925
Skipping frame 117, difference: 0.8925

0: 384x640 12 persons, 6 cars, 4 traffic lights, 498.3ms
Speed: 3.4ms preprocess, 498.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
Comparing frame 116 with frame 118: similarity sc