# Sony Video Camera Data: Segmentation, Image Extraction, and Preparation

In [16]:
## Required python libraries
import pandas as pd
import numpy as np
import scipy as sp
import scipy.signal
import os
import math
import sys
from scipy.interpolate import griddata

# to make this notebook's output stable across runs
np.random.seed(42)

pd.plotting.register_matplotlib_converters()


import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
%matplotlib inline
import seaborn as sns
import librosa
import matplotlib.font_manager as font_manager

#---------------opencv------------------------
import cv2
from IPython.display import Image, display
from tqdm import tqdm

In [5]:
from moviepy.editor import VideoFileClip

In [8]:
# Where to save the figures, and dataset locations
PROJECT_ROOT_DIR = "../"

Multimodal_dataset_PATH = "/home/chenlequn/Dataset/LDED_acoustic_visual_monitoring_dataset"

Video_path = os.path.join(Multimodal_dataset_PATH, f'Sony Camera')
IMAGE_PATH = os.path.join(PROJECT_ROOT_DIR, "result_images", 'Sony-Camera-Image')

os.makedirs(IMAGE_PATH, exist_ok=True)

## function for automatically save the diagram/graph into the folder 
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGE_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [31]:
# Define function to save figures
def save_frames(frame, sample_index, counter, image_output_folder, fig_extension="png"):
    filename = f'sample_{sample_index}_{counter}.{fig_extension}'
    path = os.path.join(image_output_folder, filename)
    # print("Saving frame", filename)
    cv2.imwrite(path, frame)
    # cv2.imwrite(path, frame, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

# Function to display video information
def display_video_info(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_duration = total_frames / fps
    # Convert total duration to minutes and seconds
    total_duration_min = int(total_duration // 60)
    total_duration_sec = int(total_duration % 60)

    print(f"Video FPS: {fps}")
    print(f"Total Frames: {total_frames}")
    print(f"Total Duration (seconds): {total_duration}")
    print(f"Total Duration: {total_duration_min} min {total_duration_sec} seconds")


    cap.release()

# Function to crop video by time and save frames
def crop_video_and_save_frames(video_path, image_output_folder, start_time, end_time, sample_index, target_fps=25):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    start_frame = int(start_time * fps)
    end_frame = int(end_time * fps)
    frame_interval = int(fps / target_fps)
    
    # Calculate total frames to process and initialize tqdm
    total_frames_to_process = (end_frame - start_frame) // frame_interval
    pbar = tqdm(total=total_frames_to_process, desc="Processing frames")
    
    counter = 1  # Initialize frame counter
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    
    while True:
        ret, frame = cap.read()
        if not ret or cap.get(cv2.CAP_PROP_POS_FRAMES) > end_frame:
            break  # Stop if the end of the segment is reached or on read error
        
        if (counter - 1) % frame_interval == 0:
            save_frames(frame, sample_index, counter, image_output_folder)
            pbar.update(1)
        
        counter += 1

    pbar.close()
    cap.release()

## Segment MTS video file

In [49]:
sample_index = 21

# ----------------Define input video file path---------------
raw_video_file = os.path.join(Video_path, f'{sample_index}.MTS') 

# Output folders
image_output_folder = os.path.join(Multimodal_dataset_PATH, '25Hz',str(sample_index), 'sony-images')
sony_camera_image_folder = os.path.join(Multimodal_dataset_PATH, '25Hz',str(sample_index), 'sony_camera_images')

# Create output folders if they don't exist
if not os.path.exists(image_output_folder):
    os.makedirs(image_output_folder)

# Create output folders if they don't exist
if not os.path.exists(sony_camera_image_folder):
    os.makedirs(sony_camera_image_folder)

# Display video information
display_video_info(raw_video_file)

Video FPS: 25.0
Total Frames: 7384
Total Duration (seconds): 295.36
Total Duration: 4 min 55 seconds


In [46]:
# Define start and end times for cropping
# Sample 21: 0 - 176.76 seconds, in total, 4419 images (initial several layers are missing in the sony videos)
# Sample 22: 0 - 178 seconds, in total 
start_time = 0  # Start time in seconds 
end_time = 180    # End time in seconds

# Crop the video and save frames at 25 Hz
crop_video_and_save_frames(raw_video_file, image_output_folder, start_time, end_time, sample_index, target_fps=25)

Processing frames:   0%|                               | 0/4500 [00:00<?, ?it/s][h264 @ 0x5ddd780] Missing reference picture, default is 2147483647
[h264 @ 0x5c2b800] reference picture missing during reorder
[h264 @ 0x5c2b800] Missing reference picture, default is 2147483647
Processing frames: 100%|████████████████████| 4500/4500 [02:40<00:00, 27.97it/s]


## Fix the index to match with the coaxial image stream.

In [50]:
import os

# Calculate the index offset
index_offset = 5030 - 4417
# Sample 21: offset is 5030 - 4417
# Sample 22: offset is 4623 - 

# Get all PNG files in the directory
image_files = [f for f in os.listdir(image_output_folder) if f.endswith(".png")]

# Loop through all files in the directory with a progress bar
for filename in tqdm(image_files, desc="Renaming images"):
    # the format 'sample_<sample_index>_<current_index>.png'
    parts = filename.split('_')
    if len(parts) == 3:
        current_index_part = parts[2].split('.')[0]  # This gets the numeric part of '<current_index>.png'
        try:
            current_index = int(current_index_part)
            # Calculate the new index
            new_index = current_index + index_offset
            # Generate the new filename
            new_filename = f'sample_{sample_index}_{new_index}.png'
            # Full path for current and new file names
            current_file_path = os.path.join(image_output_folder, filename)
            new_file_path = os.path.join(sony_camera_image_folder, new_filename)
            # Rename the file
            os.rename(current_file_path, new_file_path)
        except ValueError:
            # In case the filename format is unexpected and doesn't contain an index
            continue  # Skip files that do not match the expected format

Renaming images: 100%|███████████████████| 4500/4500 [00:00<00:00, 88076.16it/s]
