# Multisensor fusion project
- Experiment data: June 2022. Maraging Steel 300
- Experiment number (single bead wall samples): 21-26
- Recorded data: position, veolocity, coaxial ccd features, acoustic feature, thermal features
- Defect generated: keyhole pores, cracks, defect-free

### Notebook 1: Vision acoustic segmentation

In [1]:
## Required python libraries
import pandas as pd
import numpy as np
import scipy as sp
import scipy.signal
import os
import math
import sys
from scipy.interpolate import griddata

# to make this notebook's output stable across runs
np.random.seed(42)

pd.plotting.register_matplotlib_converters()


import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
%matplotlib inline
import seaborn as sns
import librosa
import matplotlib.font_manager as font_manager

#---------------opencv------------------------
import cv2

In [2]:
import scaleogram as scg 
from glob import glob
import scipy
from scipy.signal import welch
import wave                    # library handles the parsing of WAV file headers
import pywt
import soundfile as sf
import glob
import re

In [3]:
# ------------------- plotly visualizatoin----------------------------------
from PIL import Image
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from skimage import data

from glob import glob
import glob
import scipy
from scipy.signal import welch
import wave                    # library handles the parsing of WAV file headers
import pywt
import soundfile as sf
import matplotlib.font_manager as font_manager

In [4]:
## Audio signal processing libraries
import librosa
import librosa.display
import IPython.display as ipd
import plotly.express as px
import plotly.io as pio
# import nussl
import natsort

In [6]:
# Where to save the figures, and dataset locations
PROJECT_ROOT_DIR = "../"

Audio_ROOT = os.path.join("C:\\Users\\Asus\\OneDrive_Chen1470\\OneDrive - Nanyang Technological University\\Dataset\\LDED_audio_dataset")
             
Audio_PATH_original = os.path.join(Audio_ROOT, 'wave_file', "original")
Audio_PATH_equalized = os.path.join(Audio_ROOT, 'wave_file', "equalized")
Audio_PATH_bandpassed = os.path.join(Audio_ROOT, 'wave_file', "bandpassed")
Audio_PATH_denoised = os.path.join(Audio_ROOT, 'wave_file', "denoised",)


Multimodal_dataset_PATH = os.path.join("C:\\Users\\Asus\\OneDrive_Chen1470\\OneDrive - Nanyang Technological University\\Dataset\\Multimodal_AM_monitoring\\LDED_Acoustic_Visual_Dataset")
Audio_segmented_30Hz_PATH_24 = os.path.join(Multimodal_dataset_PATH, 'Audio_signal_all_30Hz_24')
Audio_segmented_30Hz_PATH = os.path.join(Multimodal_dataset_PATH, 'Audio_signal_all_30Hz')
CCD_Image_30Hz_path_22 = os.path.join(Multimodal_dataset_PATH, 'Coaxial_CCD_images_30Hz')
CCD_Image_30Hz_path_24 = os.path.join(Multimodal_dataset_PATH, 'coaxial_meltpool_images_24_30Hz')

Video_path = os.path.join(Multimodal_dataset_PATH, 'Video')

IMAGE_PATH = os.path.join(PROJECT_ROOT_DIR, "result_images", 'pre-processing')

os.makedirs(IMAGE_PATH, exist_ok=True)


## function for automatically save the diagram/graph into the folder 
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGE_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [130]:
def simple_visualization(sound, sr=44100, alpha = 1):
    fig, axs = plt.subplots(1, 1, tight_layout = True, figsize=(6, 4)) #constrained_layout=True,

    librosa.display.waveplot(sound, sr=sr, alpha=alpha, label = 'original signal')
    axs.set_xlabel('Time [sec]', fontsize = 14)
    axs.set_ylabel('Amplitute', fontsize = 14)
    axs.set_ylim([-0.28, 0.28])
    axs.set_yticks([-0.25, 0, 0.25])

    # fig.suptitle("Time-domain visualisation", fontsize = 16)

## Segment MP4 video file

- uses __FFmpeg__ to extract the audio stream from the input MP4 file
- uses __OpenCV__ to extract the video frames
- __iterates over each frame__ and uses FFmpeg to extract the corresponding audio segment; OpenCV to extract the corresponding video segment
- The audio and image segments are then saved to their respective output folders with the desired names.

In [None]:
import os
import subprocess

# Define input video file path and output folders
input_file = os.path.join(Video_path, '22_denoised_video.mp4') 
image_output_folder = os.path.join(Video_path, 'image_segments')
audio_output_folder = os.path.join(Video_path, 'audio_segments')

# Create output folders if they don't exist
if not os.path.exists(image_output_folder):
    os.makedirs(image_output_folder)
if not os.path.exists(audio_output_folder):
    os.makedirs(audio_output_folder)

# Define segment duration in seconds
segment_duration = 1/10  # 10 frames per second

# Use FFmpeg to extract the audio and video streams
subprocess.call(['ffmpeg', '-i', input_file, '-vn', '-acodec', 'copy', f'{audio_output_folder}/%d.wav'])

# Use OpenCV to extract the video frames
import cv2
cap = cv2.VideoCapture(input_file)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Iterate over the video frames and save each segment
for i in range(frame_count):
    # Calculate start and end time of current segment
    start_time = i / fps
    end_time = (i + 1) / fps

    # Use FFmpeg to extract the audio segment
    subprocess.call(['ffmpeg', '-i', input_file, '-vn', '-ss', f'{start_time}', '-t', f'{segment_duration}', '-acodec', 'copy', f'{audio_output_folder}/{i+1}.wav'])

    # Use OpenCV to extract the video segment
    cap.set(cv2.CAP_PROP_POS_MSEC, start_time*1000)
    ret, frame = cap.read()
    cv2.imwrite(f'{image_output_folder}/{i+1}.jpg', frame)

# Release resources
cap.release()