In [1]:
import librosa
import librosa.display
import scipy as sp
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
audio_path = "./ESC50/audio/1-1791-A-26.wav"
ipd.Audio(audio_path)

In [3]:
audio_path = "./ESC50/audio/1-13572-A-46.wav"
ipd.Audio(audio_path)

In [4]:
audio_path_1 = "./ESC50/audio/1-18631-A-23.wav"
ipd.Audio(audio_path_1)

In [None]:
signal, sr = librosa.load(audio_path_1)

In [None]:
signal

In [None]:
signal.shape

In [None]:
time = np.linspace(0, len(signal) / sr, num=len(signal))

# Plot the waveform
plt.figure(figsize=(12, 4))
plt.plot(time, signal)
plt.title("Audio Signal Waveform")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.grid(True)
plt.show()

In [None]:
!pip install -i https://test.pypi.org/simple/ nnAudio2

In [None]:
import torch
import librosa
from nnAudio2 import Spectrogram
import matplotlib.pyplot as plt
import torch
import os
import time

**1. MEL Spectrogram feature extraction**

In [None]:
audio_1,_ = librosa.load(audio_path_1,sr=1.46*22050)

In [None]:
features=librosa.feature.melspectrogram(y=audio_1, sr=1.46*22050,n_fft=1024, hop_length=256, window='hann', center=True, pad_mode='constant', power=2.0, n_mels=95,fmin=0,fmax=None)
log_mel_spectrogram = librosa.power_to_db(features)

In [None]:
plt.figure(figsize=(10, 6))
plt.imshow(log_mel_spectrogram, aspect='auto', origin='lower', cmap='viridis')
plt.colorbar(format="%+2.0f dB")
plt.title("melspectrogram Feature Extraction")
plt.xlabel("Time Frames")
plt.ylabel("Melspectrogram Bins")
plt.show()
# Optionally, you can convert it to a NumPy array if needed
melspectrogram_features_numpy =log_mel_spectrogram
print(f"Melspectrogram Features Shape: {melspectrogram_features_numpy.shape}")

In [None]:
def melspectrogram(file_path):
    audio,_ = librosa.load(file_path,sr=1.46*22050)
    features=librosa.feature.melspectrogram(
        y=audio,
        sr=1.46*22050,
        n_fft=1024,
        hop_length=256,
        window='hann',
        center=True,
        pad_mode='constant',
        power=2.0,
        n_mels=95,
        fmin=18,
        fmax=4186)
    mel_spec_db = librosa.power_to_db(features, ref=np.max)
    return mel_spec_db

In [None]:
import os
import numpy as np
import librosa
from librosa.feature import melspectrogram

INPUT_FOLDER = "./ESC50/audio"  # Replace with your folder path
OUTPUT_FOLDER = "./audio_mel"  # Where features will be saved

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

def process_dataset_mel(input_folder, output_folder):
    # Iterate over all files in the input folder (not assuming directory structure)
    for file_name in os.listdir(input_folder):  # Iterate over files directly in input folder
        file_path = os.path.join(input_folder, file_name)

        # Process only .wav files, ignoring directories
        if file_name.endswith('.wav') and os.path.isfile(file_path):
            print(f"Processing {file_name}")

            # Create a class-based folder in the output folder for each file's Mel spectrogram
            output_class_folder = os.path.join(output_folder, "general")  # Or create based on file name, etc.
            os.makedirs(output_class_folder, exist_ok=True)

            mel_spec = melspectrogram(file_path)

            if mel_spec is not None:
                # Save the Mel spectrogram as a .npy file
                output_file_path = os.path.join(output_class_folder, file_name.replace('.wav', '.npy'))
                np.save(output_file_path, mel_spec)
                print(f"Saved Mel spectrogram: {output_file_path}")
        else:
            print(f"Skipping {file_name}, not a valid file.")

In [None]:
# Process the dataset
process_dataset_mel(INPUT_FOLDER, OUTPUT_FOLDER)

In [None]:
file_path = 'D:/sound_classification/audio_mel/general/1-137-A-32.npy'

# Load the Mel spectrogram features
mel_features = np.load(file_path)
print(mel_features.shape)
# Example visualization (optional)
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.imshow(mel_features, aspect='auto', origin='lower', cmap='viridis')
plt.colorbar(label='Amplitude')
plt.title('Mel Spectrogram')
plt.xlabel('Time Frames')
plt.ylabel('Mel Bands')
plt.show()

In [None]:
file_path="D:/sound_classification/audio_mel/general/1-137-A-32.npy"
mel_features = np.load(file_path)
print(mel_features.shape)

**2. Gammatonegram feature extraction**

In [None]:
import numpy as np

In [None]:
gammatonegram = Spectrogram.Gammatonegram(
    sr=1.46*22050,
    n_fft=1024,         # FFT size
    n_bins=95,          # Number of Gammatone filters (bins)
    hop_length=256,     # Hop length
    window='hann',      # Window type
    center=True,        # Center frames
    pad_mode='reflect', # Padding mode
    htk=False,          # Use HTK normalization (set to False here)
    fmin=18,          # Minimum frequency for Gammatone filter
    fmax=4186,          # Maximum frequency (None means Nyquist)
    norm=1,             # Normalization
    trainable_bins=False, # Non-trainable filter bins
    trainable_STFT=False, # Non-trainable STFT
    device='cpu'        # Force CPU usage instead of CUDA
)

In [None]:
def gammatogram_f(file_path):
    audio,_ = librosa.load(file_path,sr=1.46*22050)
    audio_tensor = torch.tensor(audio).float()
    gammatone_features = gammatonegram(audio_tensor)
    mel_spec_db = librosa.power_to_db(gammatone_features, ref=np.max)
    return np.squeeze(mel_spec_db)

In [None]:
gammatogan_feature=gammatogram_f('D:\\Underwater_Data\\Inc_2000_Exc_4000\\test\\tanker\\2.wav')

In [None]:
print(gammatogan_feature.shape)
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.imshow(gammatogan_feature, aspect='auto', origin='lower', cmap='viridis')
plt.colorbar(label='Amplitude')
plt.title('Gamma Spectrogram')
plt.xlabel('Time Frames')
plt.ylabel('Gamma Bands')
plt.show()

In [None]:
INPUT_FOLDER = "D:\\Underwater_Data\\Inc_4000_Exc_6000"  # Replace with your folder path
OUTPUT_FOLDER = "D:\\Underwater_Data\\GAMM_46"  # Where features will be saved
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def process_dataset_gamma(input_folder, output_folder):
    for split in ['test', 'train','validation']:
        split_path = os.path.join(input_folder, split)
        output_split_folder = os.path.join(output_folder, split)
        os.makedirs(output_split_folder, exist_ok=True)

        for class_name in os.listdir(split_path):  # Iterate over class folders
            class_path = os.path.join(split_path, class_name)

            if os.path.isdir(class_path):  # Ensure it's a directory
                print(f"Processing {split}/{class_name}")
                output_class_folder = os.path.join(output_split_folder, class_name)
                os.makedirs(output_class_folder, exist_ok=True)

                for file_name in os.listdir(class_path):
                    if file_name.endswith('.wav'):
                        file_path = os.path.join(class_path, file_name)
                        gamma_spec =gammatogram_f(file_path)

                        if gamma_spec is not None:
                            # Save the Mel spectrogram as a .npy file
                            output_file_path = os.path.join(output_class_folder, file_name.replace('.wav', '.npy'))
                            np.save(output_file_path, gamma_spec)
                            print(f"Saved Mel spectrogram: {output_file_path}")


In [None]:
process_dataset_gamma(INPUT_FOLDER, OUTPUT_FOLDER)

**3. CQT feature extraction**

In [None]:
audio_path='D:\\Underwater_Data\\Inc_2000_Exc_4000\\test\\tanker\\2.wav'
y, sr = librosa.load(audio_path, sr=44100)  # Load with a target sampling rate (e.g., 44100 Hz)
cqt = librosa.cqt(
    y,
    sr=sr,
    hop_length=512,      # Number of samples between successive frames
    fmin=32.7,           # Minimum frequency (C1 in musical scale)           # Maximum frequency
    n_bins=84,           # Total number of frequency bins
    bins_per_octave=12,  # Number of bins per octave
    window="hann"        # Type of window
)
cqt_db = librosa.amplitude_to_db(np.abs(cqt), ref=np.max)

In [None]:
def cqt_features(file_path):
    y, sr = librosa.load(file_path, sr=1.46*22050)
    cqt = librosa.cqt(
    y,
    sr=sr,
    hop_length=256,      # Number of samples between successive frames
    fmin=18,           # Minimum frequency (C1 in musical scale)           # Maximum frequency
    n_bins=95,           # Total number of frequency bins
    bins_per_octave=12,  # Number of bins per octave
    window="hann"        # Type of window
)

    cqt_db = librosa.amplitude_to_db(np.abs(cqt), ref=np.max)
    return cqt_db


In [None]:
cqt_feture=cqt_features(audio_path)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.imshow(cqt_feture, aspect='auto', origin='lower', cmap='viridis')
plt.colorbar(label='Amplitude')
plt.title('CQT Spectrogram')
plt.xlabel('Time Frames')
plt.ylabel('CQT Bands')
plt.show()

In [None]:
INPUT_FOLDER = "D:\\Underwater_Data\\Inc_4000_Exc_6000"  # Replace with your folder path
OUTPUT_FOLDER = "D:\\Underwater_Data\\CQT_46"  # Where features will be saved
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def process_dataset_cqt(input_folder, output_folder):
    for split in ['test', 'train','validation']:  # Process each split
        split_path = os.path.join(input_folder, split)
        output_split_folder = os.path.join(output_folder, split)
        os.makedirs(output_split_folder, exist_ok=True)

        for class_name in os.listdir(split_path):  # Iterate over class folders
            class_path = os.path.join(split_path, class_name)

            if os.path.isdir(class_path):  # Ensure it's a directory
                print(f"Processing {split}/{class_name}")
                output_class_folder = os.path.join(output_split_folder, class_name)
                os.makedirs(output_class_folder, exist_ok=True)

                for file_name in os.listdir(class_path):
                    if file_name.endswith('.wav'):
                        file_path = os.path.join(class_path, file_name)
                        cqt_spec =cqt_features(file_path)

                        if cqt_spec is not None:
                            # Save the Mel spectrogram as a .npy file
                            output_file_path = os.path.join(output_class_folder, file_name.replace('.wav', '.npy'))
                            np.save(output_file_path, cqt_spec)
                            print(f"Saved Mel spectrogram: {output_file_path}")

In [None]:
process_dataset_cqt(INPUT_FOLDER, OUTPUT_FOLDER)

**4. MFCC feature extraction**

In [None]:
def extract_mfcc_1d(file_path, n_mfcc=45):
    audio, sr = librosa.load(file_path, sr=22050)
    mfcc = librosa.feature.mfcc(
        y=audio,
        sr=sr,
        n_fft=2048,
        hop_length=512,
        n_mels=50,
        n_mfcc=n_mfcc
    )
    mfcc_1d = mfcc.flatten()
    return mfcc_1d

In [None]:
INPUT_FOLDER = 'D:\\Underwater_Data\\Inc_2000_Exc_4000'  # Replace with your folder path
OUTPUT_FOLDER = 'D:\\Underwater_Data\\MFCC'  # Where features will be saved
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def process_dataset_mfcc(input_folder, output_folder):
    for split in ['test', 'train','validation']:  # Process each split
        split_path = os.path.join(input_folder, split)
        output_split_folder = os.path.join(output_folder, split)
        os.makedirs(output_split_folder, exist_ok=True)
        for class_name in os.listdir(split_path):  # Iterate over class folders
            class_path = os.path.join(split_path, class_name)
            if os.path.isdir(class_path):  # Ensure it's a directory
                print(f"Processing {split}/{class_name}")
                output_class_folder = os.path.join(output_split_folder, class_name)
                os.makedirs(output_class_folder, exist_ok=True)
                for file_name in os.listdir(class_path):
                    if file_name.endswith('.wav'):
                        file_path = os.path.join(class_path, file_name)
                        mfcc =extract_mfcc_1d(file_path)
                        if mel_spec is not None:
                            output_file_path = os.path.join(output_class_folder, file_name.replace('.wav', '.npy'))
                            np.save(output_file_path, mfcc)
                            print(f"Saved Mel spectrogram: {output_file_path}")

In [None]:
process_dataset_mfcc(INPUT_FOLDER, OUTPUT_FOLDER)

**6. STFT feature extraction technique**

In [None]:
def extract_stft_features(audio_path,sr=22050,n_fft=2024,hop_length=512):
    y, sr = librosa.load(audio_path, sr=sr)
    stft_matrix = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    stft_magnitude = np.abs(stft_matrix)
    stft_db = librosa.amplitude_to_db(stft_magnitude, ref=np.max)
    return stft_db

In [None]:
INPUT_FOLDER = 'D:\\Underwater_Data\\Inc_2000_Exc_4000'
OUTPUT_FOLDER = 'D:\\Underwater_Data\\STFT'
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def process_dataset_stft(input_folder, output_folder):
    for split in ['test', 'train','validation']:
        split_path = os.path.join(input_folder, split)
        output_split_folder = os.path.join(output_folder, split)
        os.makedirs(output_split_folder, exist_ok=True)
        for class_name in os.listdir(split_path):
            class_path = os.path.join(split_path, class_name)
            if os.path.isdir(class_path):
                print(f"Processing {split}/{class_name}")
                output_class_folder = os.path.join(output_split_folder, class_name)
                os.makedirs(output_class_folder, exist_ok=True)
                for file_name in os.listdir(class_path):
                    if file_name.endswith('.wav'):
                        file_path = os.path.join(class_path, file_name)
                        mfcc =extract_stft_features(file_path)
                        if mfcc is not None:
                            output_file_path = os.path.join(output_class_folder, file_name.replace('.wav', '.npy'))
                            np.save(output_file_path, mfcc)
                            print(f"Saved Mel spectrogram: {output_file_path}")

In [None]:
process_dataset_stft(INPUT_FOLDER, OUTPUT_FOLDER)

In [None]:
pip install kymatio librosa numpy matplotlib

**7. WST feature extraction**

In [None]:
from kymatio.numpy import Scattering1D

In [None]:
def extract_wst_features(audio_path, sr=22050, J=4, Q=6,T=32768):
    y, sr = librosa.load(audio_path, sr=sr)
    y = np.pad(y, (0, T - len(y)), mode='constant')
    scattering = Scattering1D(J=J, shape=(T,), Q=Q)
    S = scattering(y)
    return S

In [None]:
S = extract_wst_features("D:\\Underwater_Data\\Inc_2000_Exc_4000\\test\\tanker\\2.wav")
S.shape

In [None]:
stft_db = librosa.amplitude_to_db(S, ref=np.max)
plt.figure(figsize=(8, 6))  # 4x4 inches * 56 DPI = 224x224
plt.imshow(stft_db, aspect="auto",origin="lower", cmap="viridis")
plt.show()

In [None]:
import os

In [None]:
INPUT_FOLDER = 'D:\\Underwater_Data\\Inc_2000_Exc_4000'  # Replace with your folder path
OUTPUT_FOLDER = 'D:\\Underwater_Data\\WST'  # Where features will be saved
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def process_dataset_wst(input_folder, output_folder):
    for split in ['test', 'train','validation']:  # Process each split
        split_path = os.path.join(input_folder, split)
        output_split_folder = os.path.join(output_folder, split)
        os.makedirs(output_split_folder, exist_ok=True)
        for class_name in os.listdir(split_path):  # Iterate over class folders
            class_path = os.path.join(split_path, class_name)
            if os.path.isdir(class_path):  # Ensure it's a directory
                print(f"Processing {split}/{class_name}")
                output_class_folder = os.path.join(output_split_folder, class_name)
                os.makedirs(output_class_folder, exist_ok=True)
                for file_name in os.listdir(class_path):
                    if file_name.endswith('.wav'):
                        file_path = os.path.join(class_path, file_name)
                        wst =extract_wst_features(file_path)
                        if wst is not None:
                            output_file_path = os.path.join(output_class_folder, file_name.replace('.wav', '.npy'))
                            np.save(output_file_path, wst)
                            print(f"Saved Mel spectrogram: {output_file_path}")

In [None]:
process_dataset_wst(INPUT_FOLDER, OUTPUT_FOLDER)

**Concatenation of Mel spectrogram CQT and Gammatone**

In [None]:
import os
import numpy as np
input_folders = ["D:\\Underwater_Data\\CQT_46", "D:\\Underwater_Data\\GAMM_46","D:\\Underwater_Data\\MELSPECTROGRAM_46"]  # These contain test, train, validation
output_folder = "D:\\Underwater_Data\\COMBINED_THREE_46"  # New merged folder
os.makedirs(output_folder, exist_ok=True)
dataset_types = ["test", "train", "validation"]  # Subdirectories in each folder
classes = ["background", "cargo", "passengership", "tanker", "tug"]  # Class names
for dataset_type in dataset_types:
    dataset_path = os.path.join(output_folder, dataset_type)
    os.makedirs(dataset_path, exist_ok=True)  # Create test/train/validation in output folder
    for class_name in classes:
        class_path = os.path.join(dataset_path, class_name)
        os.makedirs(class_path, exist_ok=True)  # Create class folder
        reference_folder = os.path.join(input_folders[0], dataset_type, class_name)
        feature_files = sorted(os.listdir(reference_folder))  # Sorted file list
        for feature_file in feature_files:
            features_list = [
                np.load(os.path.join(folder, dataset_type, class_name, feature_file))
                for folder in input_folders
            ]
            combined_features = np.stack(features_list, axis=0)
            save_path = os.path.join(class_path, feature_file)
            np.save(save_path, combined_features)
print("Feature files successfully combined and saved in 'Combined_three_features' folder!")

**Deep learning model design**

In [None]:
pip install torch torchvision numpy

In [None]:
pip install livelossplot

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision.models import resnet18
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from torchsummary import summary
import copy
from torch.optim import lr_scheduler
from livelossplot import PlotLosses
import json
import time

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
WEIGHTS_DIR ='Resnet'
#[MODEL]
MODEL_NAME = 'Combined'
#N_RESBLOCKS = 4

SEED = 11
LR = 0.001
BS = 8
EPOCHS =40
MOMENTUM = 0.8
LR_DECAY = 0.1
LR_PATIENCE =3

In [None]:
class MelSpectrogramDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.classes = sorted(os.listdir(data_dir))  # Get class names
        self.file_paths = []
        self.labels = []

        for class_idx, class_name in enumerate(self.classes):
            class_path = os.path.join(data_dir, class_name)

            # Skip system folders like `.ipynb_checkpoints`
            if class_name == ".ipynb_checkpoints":
                continue

            for file in os.listdir(class_path):
                file_path = os.path.join(class_path, file)

                # Check if it's a valid `.npy` file
                if file.endswith(".npy"):
                    self.file_paths.append(file_path)
                    self.labels.append(class_idx)

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        mel_spec = np.load(file_path)

        mel_spec = torch.tensor(mel_spec, dtype=torch.float32)
        mel_spec = mel_spec.unsqueeze(0)  # Shape: (1, 193)# Convert to Tensor
        label = torch.tensor(self.labels[idx], dtype=torch.long)

        return mel_spec, label

In [None]:
train_dataset = MelSpectrogramDataset("D:\\Underwater_Data\\Combined_three_features_Ver2_3500\\train")
val_dataset = MelSpectrogramDataset("Combined_three_features/validation")
test_dataset=MelSpectrogramDataset("Combined_three_features/test")
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
dataloaders = {'train': train_loader, 'validation': val_loader}
dataset_sizes = {'train': len(train_dataset), 'validation': len(val_dataset)}

In [None]:
backbone=resnet18(pretrained=True)

In [None]:
PRETRAINED = True
FINETUNE = False
if PRETRAINED and not FINETUNE:
    for param in backbone.parameters():
        param.requires_grad = False

In [None]:
class ResNetAudio(nn.Module):
    def __init__(self, backbone,num_classes):
        super(ResNetAudio, self).__init__()
        self.resnet = backbone  # Load ResNet18
        num_features = self.resnet.fc.in_features
        self.resnet.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.resnet.fc = nn.Sequential(
                nn.Linear(num_features, 256),  # 512 → 256
                nn.ReLU(),
                nn.Dropout(0.5),  # Dropout to prevent overfitting
                nn.Linear(256, 128),  # 256 → 128
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(128, num_classes)  # 128 → Output classes
            )
    def forward(self, x):
        return self.resnet(x)

In [None]:
model=ResNetAudio(backbone,num_classes=5).to(device)  # 5 classes

In [None]:
summary(model,(3,224,224))

In [None]:
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
from torchvision import models

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = ExponentialLR(optimizer, gamma=0.95)

# Training function
def train_model(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes,batch_size,num_epochs=40):
    since = time.time()
    best_model_wts = model.state_dict()
    best_acc = 0.0
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(num_epochs):
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0
            n_batches = dataset_sizes[phase] // batch_size
            for it, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                print(
                    f"Epoch: {epoch+1}/{num_epochs} Iter: {it+1}/{n_batches}",
                    end="\r",
                    flush=True,
                )
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            if phase == 'train':
                train_losses.append(epoch_loss)
                train_accuracies.append(epoch_acc.item())
            if phase == 'validation':
                val_losses.append(epoch_loss)
                val_accuracies.append(epoch_acc.item())
                scheduler.step(epoch_loss)  # Reduce LR if val_loss does not improve
                last_lr = scheduler.get_last_lr()
                print(f"Epoch {epoch+1}, Learning Rate: {last_lr}")
            if phase == 'validation' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
    history = {
        "train_loss": train_losses,
        "val_loss": val_losses,
        "train_acc": train_accuracies,
        "val_acc": val_accuracies
    }
    with open("Resnet/SGD_training_history.json", "w") as f:
        json.dump(history, f)
    torch.save(best_model_wts, "Resnet/SGD_best_model.pth")
    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best validation accuracy: {best_acc:.4f}')
    model.load_state_dict(best_model_wts)
    return model

In [None]:
trained_model = train_model(model, criterion, optimizer,scheduler,dataloaders,dataset_sizes,batch_size=8,num_epochs=100)

In [None]:
with open("Resnet/SGD_training_history.json", "r") as f:
    history = json.load(f)

In [None]:
plt.figure(figsize=(5, 3))
plt.plot(history["train_loss"], label="Train Loss")
plt.plot(history["val_loss"], label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training & Validation Loss")
plt.legend()
plt.show()

# Plot training and validation accuracy
plt.figure(figsize=(5, 3))
plt.plot(history["train_acc"], label="Train Accuracy")
plt.plot(history["val_acc"], label="Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training & Validation Accuracy")
plt.legend()
plt.show()

In [None]:
backbone=resnet18(pretrained=True)

In [None]:
model=ResNetAudio(backbone,num_classes=5).to(device)  # 5 classes

In [None]:
model_weights ='Resnet/SGD_best_model.pth'
model.load_state_dict(torch.load(model_weights, map_location=device))
model.eval()

In [None]:
def test_model(device, model, test_loader, idx2class=None):
    y_pred_list = []
    y_true_list = []
    y_pred_prob=[]
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            y_test_pred = model(x_batch)
            _, y_pred_tag = torch.max(y_test_pred, dim = 1)
            y_pred_list.append(y_pred_tag.cpu().numpy())
            y_true_list.append(y_batch.cpu().numpy())
            y_pred_prob.append(y_test_pred.cpu().numpy())
    y_pred_list = [i[0] for i in y_pred_list]
    y_true_list = [i[0] for i in y_true_list]

    return y_true_list, y_pred_list,y_pred_prob

In [None]:
print('*' * 50)
print('Testing started:')
y_true, y_pred,y_pred_prob = test_model(device=device, model=model, test_loader=test_loader)
resulting_array = np.vstack(y_pred_prob)

In [None]:
class_labels=['Background', 'Cargo', 'Passengership', 'Tanker', 'Tug']

In [None]:
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
disp.plot(cmap=plt.cm.Blues)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

In [None]:
print(classification_report(y_true, y_pred))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score

In [None]:
y_true_bin = label_binarize(y_true, classes=np.unique(y_true))
y_true_bin = label_binarize(y_true, classes=np.unique(y_true))
# Calculate micro-average ROC AUC
roc_auc = roc_auc_score(y_true_bin, resulting_array, multi_class='ovr')
print("ROC AUC:", roc_auc)

In [None]:
for i in range(resulting_array.shape[1]):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], resulting_array[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"Class {class_labels[i]} (AUC = {roc_auc:.2f})")

plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlabel('False Positive Rate',fontsize=10)
plt.ylabel('True Positive Rate',fontsize=10)
plt.title('ROC Curve',fontsize=10)
plt.legend(loc="lower right",fontsize=10)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
#plt.savefig('/content/drive/MyDrive/font_images/nalvgg_100_ROC_AUC.png',dpi=600)

plt.show()

**Quantum Models**

In [None]:
!pip install pennylane

In [None]:
pip install pennylane pennylane-lightning pennylane-lightning-gpu --upgrad

In [None]:
#[QUANTUM]
QUANTUM = True
L2_NORM =True
N_QBITS = 5
N_VQC = 1
Q_DEPTH =1

In [None]:
import pennylane as qml
from pennylane import numpy as np

In [None]:
wires = 5
dev = qml.device("lightning.qubit", wires=wires)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def H_layer(nqubits):
    for idx in range(nqubits):
        qml.Hadamard(wires=idx)
def RY_layer(w):
    for idx, element in enumerate(w):
        qml.RY(element, wires=idx)
def RX_layer(w):
    for idx, element in enumerate(w):
        qml.RX(element, wires=idx)
def RZ_layer(w):
    for idx, element in enumerate(w):
        qml.RZ(element, wires=idx)
def entangling_layer(nqubits):
    for i in range(0, nqubits - 1, 2):  # Loop over even indices: i=0,2,...N-2
        qml.CNOT(wires=[i, i + 1])
    for i in range(1, nqubits - 1, 2):  # Loop over odd indices:  i=1,3,...N-3
        qml.CNOT(wires=[i, i + 1])

In [None]:
@qml.qnode(dev, interface="torch")
def quantum_net(q_input_features, q_weights_flat, q_depth=4, n_qubits=4):
    q_weights = q_weights_flat.reshape(q_depth,2,n_qubits)
    H_layer(n_qubits)
    RY_layer(q_input_features)
    for k in range(q_depth):
        #RZ_layer(q_weights[k][0])
        RY_layer(q_weights[k][0])
        #RZ_layer(q_weights[k][1])
        entangling_layer(n_qubits)
    exp_vals = [qml.expval(qml.PauliZ(position)) for position in range(n_qubits)]
    return tuple(exp_vals)

In [None]:
class QuantumLayer(nn.Module):
    """ Quantum mapping layer """
    def __init__(self,q_depth=4, n_qubits=5, q_delta=0.01):
        super().__init__()
        self.q_depth = q_depth
        self.n_qubits = n_qubits
        self.q_params = nn.Parameter(q_delta * torch.randn(q_depth *2*n_qubits))
    def forward(self, x):
        q_in = torch.tanh(x) * np.pi / 2.0
        q_out = torch.Tensor(0, self.n_qubits)
        q_out = q_out.to(device)
        for elem in q_in:
            q_out_elem = torch.hstack(quantum_net(elem, self.q_params, self.q_depth, self.n_qubits)).float().unsqueeze(0)
            q_out = torch.cat((q_out, q_out_elem))
        return q_out

In [None]:
class QuantumImagenetTransferLearning(nn.Module):
    def __init__(self, num_target_classes, backbone,q_depth=3, n_qubits=4,n_qlayers = 4):
        super().__init__()
        if n_qubits != wires:
            print('[WARNING]: Number of qubits: {} must be the same number of wires: {}'.format(n_qubits, wires))
            raise Exception("Please set a corret number of wires in .py file or change the number of qubits")
        self.last_layer=n_qubits*n_qlayers
        self.num_target_classes = num_target_classes
        self.n_qlayers = n_qlayers
        self.resnet = nn.Sequential(*list(backbone.children())[:-1])
        self.final_reduction=nn.Linear(512,self.last_layer)
        q_layers_list = [QuantumLayer(q_depth=q_depth, n_qubits=n_qubits)for _ in range(n_qlayers)]
        self.q_layers = torch.nn.Sequential(*q_layers_list)
        self.q_classifier = nn.Linear(self.last_layer, self.num_target_classes)
    def forward(self, x):
        x = self.resnet(x)
        x = torch.flatten(x, 1)
        x=self.final_reduction(x)
        features_split = torch.split(x, self.last_layer // self.n_qlayers, dim=1)
        q_features = [q_layer(feature) for q_layer, feature in zip(self.q_layers, features_split)]
        q_features = torch.cat(q_features, axis=1)
        y =  self.q_classifier(q_features)
        return y

In [None]:
backbone=resnet18(pretrained=True)

In [None]:
PRETRAINED = True
FINETUNE = False
if PRETRAINED and not FINETUNE:
    for param in backbone.parameters():
        param.requires_grad = False

In [None]:
model = QuantumImagenetTransferLearning(num_target_classes=5, backbone=backbone,q_depth=Q_DEPTH,
            n_qubits=N_QBITS,n_qlayers=N_VQC)
model = model.to(device)

In [None]:
summary(model,(3,224,224))

In [None]:
dataloaders = {'train': train_loader, 'validation': val_loader}
dataset_sizes = {'train': len(train_dataset), 'validation': len(val_dataset)}

In [None]:
device

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True)
trained_model = train_model(model, criterion, optimizer,scheduler,dataloaders,dataset_sizes,batch_size=8,num_epochs=50)

In [None]:
import os
import shutil
import random
source_dir = "D:\\Underwater_Data\\Combined_three_features_Ver2\\train"
destination_dir = "D:\\Underwater_Data\\Combined_three_features_Ver2_3500\\train"
os.makedirs(destination_dir, exist_ok=True)
for class_name in os.listdir(source_dir):
    class_path = os.path.join(source_dir, class_name)
    new_class_path = os.path.join(destination_dir, class_name)
    if os.path.isdir(class_path):
        os.makedirs(new_class_path, exist_ok=True)
        npy_files = [f for f in os.listdir(class_path) if f.endswith('.npy')]
        selected_files = random.sample(npy_files, 3500)
        for file in selected_files:
            shutil.copy(os.path.join(class_path, file), os.path.join(new_class_path, file))

In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import pywt
import cv2

In [None]:
def extract_cwt_features(audio_path, sr=, scales=np.arange(1, 129), wavelet='cmor'):
    y, sr = librosa.load(audio_path, sr=sr)
    coefficients, frequencies = pywt.cwt(y, scales, wavelet, sampling_period=1/sr)
    return coefficients

In [None]:
cwt=extract_cwt_features("D:\\Underwater_Data\\Inc_2000_Exc_4000\\test\\tanker\\2.wav")

In [None]:
cwt.shape

In [None]:
import os

In [None]:
INPUT_FOLDER = 'D:\\Underwater_Data\\Inc_2000_Exc_4000'  # Replace with your folder path
OUTPUT_FOLDER = 'D:\\Underwater_Data\\STFT'  # Where features will be saved
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
def process_dataset_stft(input_folder, output_folder):
    for split in ['test', 'train','validation']:  # Process each split
        split_path = os.path.join(input_folder, split)
        output_split_folder = os.path.join(output_folder, split)
        os.makedirs(output_split_folder, exist_ok=True)
        for class_name in os.listdir(split_path):  # Iterate over class folders
            class_path = os.path.join(split_path, class_name)
            if os.path.isdir(class_path):  # Ensure it's a directory
                print(f"Processing {split}/{class_name}")
                output_class_folder = os.path.join(output_split_folder, class_name)
                os.makedirs(output_class_folder, exist_ok=True)
                for file_name in os.listdir(class_path):
                    if file_name.endswith('.wav'):
                        file_path = os.path.join(class_path, file_name)
                        mfcc =extract_stft_features(file_path)
                        if mfcc is not None:
                            output_file_path = os.path.join(output_class_folder, file_name.replace('.wav', '.npy'))
                            np.save(output_file_path, mfcc)
                            print(f"Saved Mel spectrogram: {output_file_path}")

In [None]:
process_dataset_stft(INPUT_FOLDER, OUTPUT_FOLDER)