In [18]:
import librosa
import numpy as np

def get_audio_peaks(audio_path):
    y, sr = librosa.load(audio_path)
    # Compute onset strength envelope
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    
    # Pick peaks from onset envelope
    peaks = librosa.util.peak_pick(onset_env,
                                   pre_max=3,
                                   post_max=3,
                                   pre_avg=3,
                                   post_avg=3,
                                   delta=0.2,
                                   wait=5)
    
    # Convert peak indices to timestamps
    times = librosa.frames_to_time(peaks, sr=sr)
    times = np.round(times, 2)
    return times


# Example usage:
audio_peaks = get_audio_peaks('/kaggle/input/audio-only/audio_only_ID_1.wav')


In [19]:
audio_peaks

array([ 1.65,  2.76,  4.88,  5.83,  8.1 ,  8.89, 11.33, 11.91, 14.58,
       14.88, 17.81])

The above code block uses librosa to detect significant sound events (peaks) in an audio file. It loads the audio, computes an onset envelope (representing energy changes), and identifies peaks using librosa.util.peak_pick(). These peaks are converted to timestamps and rounded to two decimal places for precision. The function is useful for identifying impact sounds i.e ball hitting edges in audio files, which can later be matched with corresponding video events.

In [5]:
import cv2
import numpy as np

In [8]:
def detect_frame_boundaries(frame):
    # Convert frame to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Apply Canny edge detection
    edges = cv2.Canny(gray_frame, 50, 150)
    
    # Find contours in the edge-detected image
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    for contour in contours:
        # Approximate contour to a polygon
        approx = cv2.approxPolyDP(contour, 0.01 * cv2.arcLength(contour, True), True)
        
        # Check if it is a rectangle (4 corners)
        if len(approx) == 4:
            x, y, w, h = cv2.boundingRect(approx)  # Get bounding box of rectangle
            return x, y, w, h  # Return rectangle boundaries
    
    return None  # Return None if no rectangle is found


This function is for detecting the actual boundaries of the rectangular frame in videos where collisions occur. These boundaries are used to check if the ball hits any edge during its motion.

In [28]:
def detect_collisions_with_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        print(f"Error: Unable to open video file at {video_path}")
        return []
    
    impacts = []  # List to store timestamps of collisions
    fps = cap.get(cv2.CAP_PROP_FPS)  # Frames per second
    frame_count = 0
    
    rect_boundaries = None  # To store detected rectangular frame boundaries
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Detect rectangular frame in the first frame
        if rect_boundaries is None:
            rect_boundaries = detect_frame_boundaries(frame)
            if rect_boundaries is None:
                rect_boundaries = [50, 34, 341, 223]
                break
        
        x_rect, y_rect, w_rect, h_rect = rect_boundaries
        
        # Convert frame to HSV for better color segmentation
        hsv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        
        # Define HSV range for detecting the blue ball
        lower_blue = np.array([100, 150, 50])  # Adjust these values if needed
        upper_blue = np.array([140, 255, 255])
        
        # Create a mask for the blue ball
        mask = cv2.inRange(hsv_frame, lower_blue, upper_blue)
        
        # Find contours in the mask
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # Select the largest contour (assumed to be the ball)
            largest_contour = max(contours, key=cv2.contourArea)
            x_ball, y_ball, w_ball, h_ball = cv2.boundingRect(largest_contour)  # Get bounding box
            
            # Calculate center of the ball
            ball_center_x = x_ball + w_ball // 2
            ball_center_y = y_ball + h_ball // 2
            
            # Check for collisions with edges of the rectangular frame
            if (
                ball_center_x - 6 <= x_rect or ball_center_x + 6 >= x_rect + w_rect or  # Left or right edge of rectangle
                ball_center_y - 6 <= y_rect     # Bottom edge of rectangle
            ):
                impact_time = round(frame_count / fps, 2)  # Calculate timestamp in seconds and round to 2 decimal places
                
                # Add timestamp only if it's at least 0.25 seconds apart from the last recorded impact
                if len(impacts) == 0 or (impact_time - impacts[-1] >= 0.25):
                    impacts.append(impact_time)
        
        frame_count += 1
    
    cap.release()
    return impacts

# Example usage:
video_path = '/kaggle/input/video-only/video_only_ID_1.mp4'
collisions = detect_collisions_with_frame(video_path)
print("Collision timestamps:", collisions)


Collision timestamps: [0.32, 1.65, 2.98, 4.32, 5.65, 6.98, 8.32, 9.65, 10.98, 12.32, 13.65, 14.98, 16.32, 17.65, 18.98]


The provided code, `detect_collisions_with_frame`, identifies timestamps when a ball collides with the edges of a rectangular frame in a video. Here's a concise explanation:

1. **Video Processing**: The function reads the video frame-by-frame using OpenCV. If the rectangular frame boundaries are not detected in the first frame, default boundaries (`[50, 34, 341, 223]`) are used.

2. **Ball Detection**: Each frame is converted to HSV color space to isolate the blue ball using a predefined HSV range. A binary mask is created, and the largest contour in the mask is assumed to be the ball.

3. **Collision Detection**: The center of the ball is calculated, and its position is checked against the rectangular frame's boundaries (left, right, and bottom edges). If a collision is detected, the timestamp (in seconds) is calculated and added to the `impacts` list, ensuring at least 0.25 seconds between consecutive timestamps.
Here i also added a padding of 6px to the center of the ball because exact center of ball will never hit the edge.When i didnt add that padding i got no collisions and 6 number was optimal
4. **Output**: The function returns a list of rounded timestamps (`impacts`) indicating when collisions occurred.

In [21]:
import pandas as pd

In [22]:
df = pd.read_csv("/kaggle/input/soln-map/submit_solution_mapping.csv")

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AUDIO   45 non-null     object 
 1   VIDEO   0 non-null      float64
dtypes: float64(1), object(1)
memory usage: 848.0+ bytes


In [25]:
df.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,AUDIO,VIDEO
0,audio_only_ID_6.wav,
1,audio_only_ID_34.wav,
2,audio_only_ID_27.wav,
3,audio_only_ID_29.wav,
4,audio_only_ID_41.wav,


In [29]:
import os

# Function to generate audio timestamps for all audio files
def generate_audio_timestamps(audio_folder):
    audio_timestamps = {}
    for audio_file in os.listdir(audio_folder):
        if audio_file.endswith('.wav'):
            # Replace this with your function to extract audio timestamps
            timestamps = get_audio_peaks(os.path.join(audio_folder, audio_file))
            audio_timestamps[audio_file] = timestamps
    return audio_timestamps

# Function to generate video timestamps for all video files
def generate_video_timestamps(video_folder):
    video_timestamps = {}
    for video_file in os.listdir(video_folder):
        if video_file.endswith('.mp4'):
            # Replace this with your function to extract video timestamps
            timestamps = detect_collisions_with_frame(os.path.join(video_folder, video_file))
            video_timestamps[video_file] = timestamps
    return video_timestamps

# Example usage
audio_folder = '/kaggle/input/audio-only'
video_folder = '/kaggle/input/video-only'

audio_timestamps_dict = generate_audio_timestamps(audio_folder)
video_timestamps_dict = generate_video_timestamps(video_folder)

print(len(audio_timestamps_dict))
print(len(video_timestamps_dict))


45
45


Here we are generating all the time stamps for both audio and video files

In [39]:
import numpy as np

def compute_matching_cost(audio_timestamps, video_timestamps):
    """
    Compute the cost of matching an audio file with a video file.
    The cost is defined as the sum of absolute differences between closest timestamps.
    """
    if not video_timestamps:  # Handle empty list case
        return float('inf')  # Assign a high cost when no match is possible

    cost = sum(abs(min(video_timestamps, key=lambda x: abs(x - a_time)) - a_time) for a_time in audio_timestamps)
    return cost


In [36]:
def one_to_one_matching(audio_timestamps_dict, video_timestamps_dict):
    """
    Perform 1-to-1 matching between audio files and video files based on timestamp similarity.
    Returns a dictionary where keys are audio filenames and values are matched video filenames.
    """
    # Create a list of all possible pairs (audio, video) with their costs
    pairs = []
    for audio_file, audio_timestamps in audio_timestamps_dict.items():
        for video_file, video_timestamps in video_timestamps_dict.items():
            cost = compute_matching_cost(audio_timestamps, video_timestamps)
            pairs.append((audio_file, video_file, cost))
    
    # Sort pairs by cost (ascending order)
    pairs.sort(key=lambda x: x[2])
    
    # Perform greedy matching
    matched_audio = set()
    matched_video = set()
    matches = {}

    for audio_file, video_file, cost in pairs:
        if audio_file not in matched_audio and video_file not in matched_video:
            matches[audio_file] = video_file
            matched_audio.add(audio_file)
            matched_video.add(video_file)
    
    return matches


In [37]:
import pandas as pd

def update_dataframe_with_matches(df, matches_dict):
    """
    Update the VIDEO column of the DataFrame with matched video filenames.
    """
    for index, row in df.iterrows():
        audio_file = row['AUDIO']
        if audio_file in matches_dict:
            df.at[index, 'VIDEO'] = matches_dict[audio_file]
    
    return df


In [40]:

matches_dict = one_to_one_matching(audio_timestamps_dict, video_timestamps_dict)

# Update DataFrame with matches
df = update_dataframe_with_matches(df, matches_dict)

# Save updated DataFrame to CSV
df.to_csv('audio_video_matches.csv', index=False)

print("Updated DataFrame:")
print(df)


Updated DataFrame:
                   AUDIO                 VIDEO
0    audio_only_ID_6.wav  video_only_ID_32.mp4
1   audio_only_ID_34.wav  video_only_ID_27.mp4
2   audio_only_ID_27.wav  video_only_ID_40.mp4
3   audio_only_ID_29.wav  video_only_ID_10.mp4
4   audio_only_ID_41.wav  video_only_ID_37.mp4
5   audio_only_ID_13.wav  video_only_ID_14.mp4
6   audio_only_ID_32.wav  video_only_ID_41.mp4
7   audio_only_ID_45.wav  video_only_ID_25.mp4
8   audio_only_ID_36.wav  video_only_ID_13.mp4
9   audio_only_ID_44.wav  video_only_ID_36.mp4
10  audio_only_ID_40.wav   video_only_ID_5.mp4
11   audio_only_ID_9.wav  video_only_ID_17.mp4
12   audio_only_ID_2.wav  video_only_ID_16.mp4
13  audio_only_ID_30.wav  video_only_ID_28.mp4
14  audio_only_ID_19.wav  video_only_ID_35.mp4
15  audio_only_ID_20.wav  video_only_ID_39.mp4
16  audio_only_ID_43.wav  video_only_ID_21.mp4
17  audio_only_ID_11.wav   video_only_ID_7.mp4
18   audio_only_ID_7.wav   video_only_ID_2.mp4
19  audio_only_ID_23.wav   video_only_ID_

In [41]:
output_file_path = '/kaggle/working/audio_video_matches.csv'  # Path to save the file

# Save the DataFrame to a CSV file
df.to_csv(output_file_path, index=False)

print(f"DataFrame saved successfully to {output_file_path}.")


DataFrame saved successfully to /kaggle/working/audio_video_matches.csv.
