In [4]:
import cv2
import numpy as np
from moviepy import VideoFileClip
import math

# Parameters
min_contour_width = 40  # Minimum width of a detected contour
min_contour_height = 40  # Minimum height of a detected contour
line_orientation = "vertical"  # "horizontal" or "vertical"
line_position = 550  # Position of the counting line
offset = 10  # Offset for line crossing detection
tracking_duration_threshold = 1 # Frames required for continuous tracking
vehicles = 0  # Counter for vehicles
frame_id = 0

# Noise metrics
db_levels = []  # List to store dB levels for analysis

# Tracking
active_tracks = {}  # Dictionary to store tracked object states

# Initialize video capture
cap = cv2.VideoCapture("Timeline1.mp4")  # Replace with your video file path
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

fps = cap.get(cv2.CAP_PROP_FPS)
frame_duration = 1 / fps  # Duration of each frame in seconds
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
video_duration = total_frames / fps  # Total video duration in seconds

# Vehicles per hour initialization
vehicles_per_hour = 0

# Extract audio from the video
audio_clip = VideoFileClip("Timeline1.mp4").audio
audio_fps = audio_clip.fps
audio_array = audio_clip.to_soundarray(fps=audio_fps)

# Function to compute dB levels
def calculate_db(audio_segment):
    rms = np.sqrt(np.mean(np.square(audio_segment)))
    db = 20 * math.log10(rms) if rms > 0 else -float("inf")
    return max(db, -100) *-1 # Clip to -100 dB as minimum for visualization

# Get initial frames
ret, frame1 = cap.read()
ret, frame2 = cap.read()

if not ret:
    print("Error: Could not read frames from video.")
    cap.release()
    exit()

def get_centroid(x, y, w, h):
    """Calculate the centroid of a bounding box."""
    return (int(x + w / 2), int(y + h / 2))

# Processing loop
while ret:
    # Calculate frame difference
    d = cv2.absdiff(frame1, frame2)
    grey = cv2.cvtColor(d, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(grey, (5, 5), 0)

    # Thresholding and morphological operations
    _, th = cv2.threshold(blur, 20, 255, cv2.THRESH_BINARY)
    dilated = cv2.dilate(th, np.ones((3, 3)), iterations=2)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    closing = cv2.morphologyEx(dilated, cv2.MORPH_CLOSE, kernel)

    # Find contours
    contours, _ = cv2.findContours(closing, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)

        # Filter contours by size
        if w >= min_contour_width and h >= min_contour_height:
            centroid = get_centroid(x, y, w, h)
            active_tracks[frame_id] = {
                "centroid": centroid,
                "frames": active_tracks.get(frame_id, {}).get("frames", 0) + 1,
            }
            cv2.rectangle(frame1, (x, y), (x + w, y + h), (255, 0, 0), 2)
            cv2.circle(frame1, centroid, 5, (0, 255, 0), -1)

    # Check and count objects meeting the tracking threshold
    keys_to_remove = []
    for key, value in active_tracks.items():
        cx, cy = value["centroid"]
        if value["frames"] >= tracking_duration_threshold:
            if line_orientation == "horizontal":
                if (line_position - offset) < cy < (line_position + offset):
                    vehicles += 1
                    keys_to_remove.append(key)
            elif line_orientation == "vertical":
                if (line_position - offset) < cx < (line_position + offset):
                    vehicles += 1
                    keys_to_remove.append(key)

    for key in keys_to_remove:
        active_tracks.pop(key, None)

    # Extract audio segment corresponding to the current frame
    audio_start_index = int((frame_id / fps) * audio_fps)
    audio_end_index = int(((frame_id + 1) / fps) * audio_fps)
    audio_segment = audio_array[audio_start_index:audio_end_index]

    # Calculate and record dB levels
    db_level = calculate_db(audio_segment)
    db_levels.append(db_level)

    # Draw the counting line
    if line_orientation == "horizontal":
        cv2.line(frame1, (0, line_position), (frame1.shape[1], line_position), (0, 255, 0), 2)
    elif line_orientation == "vertical":
        cv2.line(frame1, (line_position, 0), (line_position, frame1.shape[0]), (0, 255, 0), 2)

    # Display vehicle count and audio level
    cv2.putText(frame1, f"Total Vehicles: {vehicles}", (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    cv2.putText(frame1, f"Audio Level (dB): {db_level:.2f}", (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

    # Display the frame
    cv2.imshow("Vehicle Detection and Audio Analysis", frame1)


    if cv2.waitKey(1) == 27:  # ESC key
        break

    # Update frames
    frame1 = frame2
    ret, frame2 = cap.read()
    frame_id += 1

# Calculate statistics
max_db = max(db_levels)
min_db = min(db_levels)
average_db = sum(db_levels) / len(db_levels)

# Calculate vehicles per hour
total_hours = video_duration / 3600  # Convert video duration to hours
vehicles_per_hour = vehicles / total_hours

# Report summary
print("\n==== Video Analysis Report ====")
print(f"Total Vehicles Detected: {vehicles}")
print(f"Vehicles Per Hour: {vehicles_per_hour:.2f}")
print(f"Maximum Noise Level (dB): {max_db:.2f}")
print(f"Minimum Noise Level (dB): {min_db:.2f}")
print(f"Average Noise Level (dB): {average_db:.2f}")
print("================================")

# Release resources
cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 