# MultiSensor Dataset Preparation 
- Experiment data: March 2024. Aluminum, Laser-Wire DED
- Aurthor: Chen Lequn

### Notebook 1: Video Segmentation

In [1]:
## Required python libraries
import pandas as pd
import numpy as np
import scipy as sp
import scipy.signal
import os
import math
import sys
from scipy.interpolate import griddata

# to make this notebook's output stable across runs
np.random.seed(42)

pd.plotting.register_matplotlib_converters()


import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
%matplotlib inline
import seaborn as sns
import librosa
import matplotlib.font_manager as font_manager

#---------------opencv------------------------
import cv2
from IPython.display import Image, display

In [20]:
import scaleogram as scg 
from glob import glob
import scipy
from scipy.signal import welch
import wave                    # library handles the parsing of WAV file headers
import pywt
import soundfile as sf
import glob
import re
from tqdm.notebook import tqdm
import numpy as np
import torch
from PIL import Image

In [3]:
# ------------------- plotly visualizatoin----------------------------------
# from PIL import Image
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from skimage import data

from glob import glob
import glob
import scipy
from scipy.signal import welch
import wave                    # library handles the parsing of WAV file headers
import pywt
import soundfile as sf
import matplotlib.font_manager as font_manager

In [4]:
## Audio signal processing libraries
import librosa
import librosa.display

import IPython.display as ipd
import plotly.express as px
import plotly.io as pio
# import nussl
import natsort

In [5]:
from moviepy.editor import VideoFileClip

In [12]:
# Where to save the figures, and dataset locations
PROJECT_ROOT_DIR = "../"

# Change to desirable location of the raw dataset
Multimodal_dataset_PATH = "F:\Laser-Wire-DED-ThermalAudio-Dataset"

Dataset_path = os.path.join(Multimodal_dataset_PATH, f'25Hz')
Video_path = os.path.join(Multimodal_dataset_PATH, 'Raw_Video')
Audio_path = os.path.join(Multimodal_dataset_PATH, 'Audio')
IMAGE_PATH = os.path.join(PROJECT_ROOT_DIR, "result_images", 'pre-processing')

os.makedirs(IMAGE_PATH, exist_ok=True)
os.makedirs(Dataset_path, exist_ok=True)
os.makedirs(Audio_path, exist_ok=True)

## function for automatically save the diagram/graph into the folder 
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGE_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [7]:
def simple_visualization(sound, sr=44100, alpha = 1, fig_size = (10,2)):
    fig, axs = plt.subplots(1, 1, tight_layout = True, figsize=fig_size) #constrained_layout=True,

    librosa.display.waveplot(sound, sr=sr, alpha=alpha, label = 'original signal')
    axs.set_xlabel('Time [sec]', fontsize = 14)
    axs.set_ylabel('Amplitute', fontsize = 14)
    # axs.set_ylim([-0.28, 0.28])
    # axs.set_yticks([-0.25, 0, 0.25])

    # fig.suptitle("Time-domain visualisation", fontsize = 16)
    
def plot_spectrogram(spec, title=None, ylabel='Frequency bins', aspect='auto', xmax=None):
    fig, axs = plt.subplots(1, 1)
    axs.set_title(title or 'Spectrogram (db)')
    axs.set_ylabel(ylabel)
    axs.set_xlabel('frame')
    im = axs.imshow(librosa.power_to_db(spec), origin='lower', aspect=aspect)
    if xmax:
        axs.set_xlim((0, xmax))
        fig.colorbar(im, ax=axs)
    # plt.show(block=False)

In [9]:
# sys.path.insert(0, 'utils')

# import visualization

# Segment AVI video file

Methodologies:
- __FFmpeg__:  extract the audio stream from the input MP4 file (__not recommended__) --> result in corrupted file
- __moviepy__: extract audio from mp4 video stream. (`verified approach`)
- __OpenCV__: extract the image frames from a video file (`verified approach`)
- __iterates over each frame__ and uses FFmpeg/moviepy to extract the corresponding audio segment; OpenCV to extract the corresponding video segment
- The audio and image segments are then saved to their respective output folders with the desired names.

## Single file segmentation

In [11]:
sample_index = 25

# ----------------Define input video file path---------------
raw_video_file = os.path.join(Video_path,  f'Exp_{sample_index}.avi')

# Output folders
raw_audio_output_folder = os.path.join(Multimodal_dataset_PATH,  '25Hz', str(sample_index), 'raw_audio')
image_output_folder = os.path.join(Multimodal_dataset_PATH, '25Hz',str(sample_index), 'images')

# Create output folders if they don't exist
if not os.path.exists(image_output_folder):
    os.makedirs(image_output_folder)
if not os.path.exists(raw_audio_output_folder):
    os.makedirs(raw_audio_output_folder)

In [13]:
### ------------------Extract audio (wav) from video (MP4/avi) ------------------
### ---------For single video file -----------
# Extract the audio
audio = VideoFileClip(raw_video_file).audio
# Save the audio to a file
audio.write_audiofile(f'{Audio_path}/{sample_index}_raw.wav')

MoviePy - Writing audio in F:\Laser-Wire-DED-ThermalAudio-Dataset\Audio/25_raw.wav


                                                                                                                       

MoviePy - Done.




### Testing for single a single video file

In [16]:
### -----------------For single video file ---------------
# Load the audio and calculate the segment length in samples
y_raw, sr = librosa.load(f'{Audio_path}/{sample_index}_raw.wav', sr=None)

segment_duration = 0.04  # 0.04 seconds. 25 Hz
segment_length = int(segment_duration * sr)
print (f"sampling rate: {sr}")
print ("the total number of samples segmented from this video is: " + str(segment_length))
print ("Each sample length in second is: " + str(segment_length/44100))
print ("total duration of audio signal: " + str(len(y_raw)/44100))
print ("total number of images: " + str((len(y_raw) - segment_length)/segment_length))

sampling rate: 44100
the total number of samples segmented from this video is: 1764
Each sample length in second is: 0.04
total duration of audio signal: 29.29
total number of images: 731.25


In [17]:
len(y_raw) - segment_length

1289925

In [18]:
segment_length

1764

### Method: direct segmentation
- No past data was used
- Only the current time frame

In [21]:
# Initialize a counter for audio and image file names
counter = 1
segment_length_samples = int(0.04 * sr)  # 40ms corresponds to 0.04 seconds, 25 Hz
raw_seg = []


# Initialize video capture once before the loop
cap = cv2.VideoCapture(raw_video_file)
fps = cap.get(cv2.CAP_PROP_FPS)

# Calculate total iterations for tqdm
total_iterations = (len(y_raw) - segment_length_samples) // segment_length_samples

# Iterate over the audio and save each segment
for i in tqdm(range(0, len(y_raw) - segment_length_samples, segment_length_samples), total=total_iterations):
    # print (i)
    # Extract the audio segment
    segment_raw = y_raw[i:i+segment_length]
    raw_seg.append(segment_raw)
  
    sf.write(f'{raw_audio_output_folder}/sample_{sample_index}_{counter}.wav', segment_raw, samplerate = sr)
   
    # Use OpenCV to extract the video segment
    frame_index = round(i / sr * fps)  # Rounding might give a closer frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
    ret, frame = cap.read()
    if ret:  # Check if frame read successfully
        cv2.imwrite(f'{image_output_folder}/sample_{sample_index}_{counter}.jpg', frame)
    else:
        print(f"Failed to read frame for counter: {counter}")
    
    # Increment the counter
    counter += 1

cap.release()

  0%|          | 0/731 [00:00<?, ?it/s]

Failed to read frame for counter: 695
Failed to read frame for counter: 696
Failed to read frame for counter: 697
Failed to read frame for counter: 704
Failed to read frame for counter: 705
Failed to read frame for counter: 706
Failed to read frame for counter: 707
Failed to read frame for counter: 708
Failed to read frame for counter: 709
Failed to read frame for counter: 713
Failed to read frame for counter: 714
Failed to read frame for counter: 715
Failed to read frame for counter: 716
Failed to read frame for counter: 717
Failed to read frame for counter: 718
Failed to read frame for counter: 719
Failed to read frame for counter: 720
Failed to read frame for counter: 721
Failed to read frame for counter: 722
Failed to read frame for counter: 723
Failed to read frame for counter: 724
Failed to read frame for counter: 725
Failed to read frame for counter: 726
Failed to read frame for counter: 727
Failed to read frame for counter: 728
Failed to read frame for counter: 729
Failed to re