# Import Requiremets

In [None]:
%load_ext autoreload
%autoreload 2
import os
if 1:
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchaudio
import torchvision.transforms as transforms
import os
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torchvision.models as models
import torch.nn.functional as F
import time
import librosa
from skopt import gp_minimize
from skopt.space import Real, Categorical
from skopt.utils import use_named_args

# Check for CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
import torch
import torch.nn as nn
from collections import defaultdict


class Add(nn.Module):
    '''
    Adds two tensors and returns the result
    '''
    def __init__(self,activation=None):
        super(Add, self).__init__()
        self.activation = activation
        self.digital = True
        
    def forward(self, x):
        if len(x) != 2:
            print('ERR: Num tensors to add',len(x))
            raise
#         return torch.stack(x,dim=0).sum(dim=0)
        if self.activation is not None:
            return self.activation(torch.stack(x,dim=0).sum(dim=0))
        else:
            return torch.stack(x,dim=0).sum(dim=0)
        
def model_summary(M, pt_191=False):
    """
    This function provides summary of all the named classes in the model.
    Use arguments pt_191=True for pytorch 1.9.1 usage, default pt_191 = False
    Returns a dictionary of class names and usage count.
    """
    def zero(): return 0
    cdict = defaultdict(zero)
    

    for n,m in M.named_modules(remove_duplicate=True):
        if isinstance(m,nn.Conv2d):
            if M.get_submodule(n.rsplit('.',1)[0]).__class__.__name__ == 'CART':
                cdict['CART_'+m.__class__.__name__]+=1
                
            else:
                cdict[m.__class__.__name__]+=1
                
            
        elif isinstance(m,(nn.ReLU,Add)) and hasattr(m,'digital'):
            if m.digital:
                cdict[m.__class__.__name__]+=1
                
            else:
                cdict['CART_'+m.__class__.__name__]+=1
                
        else:
             cdict[m.__class__.__name__]+=1
        
            
    w_size=0        
    for p in M.parameters():
        w_size+=p.shape.numel()
    cdict['Parameters'] = str(w_size/1e6)+'M'   
        
    return dict(cdict)

# Class AudioDataset

In [None]:
class AudioDataset(Dataset):
    def __init__(self, directory, desired_duration, sample_rate=44100):
        self.directory = directory
        self.classes = sorted(os.listdir(directory))
        self.audio_files = []
        self.desired_duration = desired_duration
        self.sample_rate=sample_rate

        for i, class_name in enumerate(self.classes):
            class_path = os.path.join(directory, class_name)
            for audio_file in os.listdir(class_path):
                self.audio_files.append((os.path.join(class_path, audio_file), i))

    def __len__(self):
        return len(self.audio_files)

    def __getitem__(self, idx):
        audio_file, label = self.audio_files[idx]
        # print(f"Loading audio file: {audio_file}")
        # waveform, sample_rate = torchaudio.load(audio_file)
        waveform, sample_rate = librosa.load(audio_file, sr=None) 
        # print(f"Loaded waveform shape: {waveform.shape}, Sample rate: {sample_rate}")
        waveform = self._process_waveform(waveform)
        waveform = torch.tensor(waveform, dtype=torch.float32)
        return waveform, label
    
    def _process_waveform(self, waveform):
        if len(waveform) != self.desired_duration * self.sample_rate:
            waveform = librosa.resample(waveform, orig_sr=len(waveform), target_sr=self.sample_rate)

        if len(waveform) < self.desired_duration * self.sample_rate:
            # print("Padding waveform...")
            pad_size = self.desired_duration * self.sample_rate - len(waveform)
            waveform = torch.tensor(waveform).unsqueeze(0)  # Convert to torch tensor
            waveform = torch.nn.functional.pad(waveform, (0, pad_size)).squeeze(0)  # Pad and remove the added dimension
        elif len(waveform) > self.desired_duration * self.sample_rate:
            # print("Truncating waveform...")
            waveform = waveform[:self.desired_duration * self.sample_rate]

        return waveform

## Define data directories

In [None]:
train_dir = '/noise_identification/data/train'
validation_dir = '/noise_identification/data/validate'
test_dir = '/noise_identification/data/test'

## Load datasets

In [None]:
desired_duration = 6  # Duration in seconds
train_dataset = AudioDataset(train_dir, desired_duration=desired_duration)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

validation_dataset = AudioDataset(validation_dir,desired_duration=desired_duration)
validation_loader = DataLoader(validation_dataset, batch_size=8, shuffle=False)

test_dataset = AudioDataset(test_dir, desired_duration=desired_duration)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Class RawNet

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from torch.utils import data
from collections import OrderedDict
from torch.nn.parameter import Parameter


class Residual_block(nn.Module):
	def __init__(self, nb_filts, first = False):
		super(Residual_block, self).__init__()
		self.first = first
		
		if not self.first:
			self.bn1 = nn.BatchNorm1d(num_features = nb_filts[0])
		self.lrelu = nn.LeakyReLU()
		self.lrelu_keras = nn.LeakyReLU(negative_slope=0.3)

		self.conv1 = nn.Conv1d(in_channels = nb_filts[0],
			out_channels = nb_filts[1],
			kernel_size = 3,
			padding = 1,
			stride = 1)
		self.bn2 = nn.BatchNorm1d(num_features = nb_filts[1])
		self.conv2 = nn.Conv1d(in_channels = nb_filts[1],
			out_channels = nb_filts[1],
			padding = 1,
			kernel_size = 3,
			stride = 1)

		if nb_filts[0] != nb_filts[1]:
			self.downsample = True
			self.conv_downsample = nn.Conv1d(in_channels = nb_filts[0],
				out_channels = nb_filts[1],
				padding = 0,
				kernel_size = 1,
				stride = 1)
		else:
			self.downsample = False
		self.mp = nn.MaxPool1d(3)

	def forward(self, x):
		identity = x
		if not self.first:
			out = self.bn1(x)
			out = self.lrelu_keras(out)
		else:
			out = x

		out = self.conv1(x)
		out = self.bn2(out)
		out = self.lrelu_keras(out)
		out = self.conv2(out)

		if self.downsample:
			identity = self.conv_downsample(identity)
		
		out += identity
		out = self.mp(out)
		return out

class RawNet(nn.Module):
	def __init__(self):
		super(RawNet, self).__init__()
		#self.negative_k = d_args['negative_k']
		self.first_conv = nn.Conv1d(in_channels = 1,
			out_channels = 128,
			kernel_size = 3,
			padding = 0,
			stride = 3)
		self.first_bn = nn.BatchNorm1d(num_features = 128)
		self.lrelu = nn.LeakyReLU()
		self.lrelu_keras = nn.LeakyReLU(negative_slope = 0.3)

		self.block0 = self._make_layer(nb_blocks = 2,
			nb_filts = [128,128],
			first = True)
		self.block1 = self._make_layer(nb_blocks = 4,
			nb_filts = [128, 256])

		self.bn_before_gru = nn.BatchNorm1d(num_features = 256)
		self.gru = nn.GRU(input_size = 256,
			hidden_size = 1024,
			num_layers = 1,
			batch_first = True)
		self.fc1_gru = nn.Linear(in_features = 1024,
			out_features = 1024)
		self.fc2_gru = nn.Linear(in_features =1024,
			out_features = 3,
			bias = True)

	def forward(self, x, y = 0, is_test=False):
		# print(x.shape)
		x = x.unsqueeze(1)
		# print(x.shape)
		x = self.first_conv(x)
		x = self.first_bn(x)
		x = self.lrelu_keras(x)

		x = self.block0(x)
		x = self.block1(x)

		x = self.bn_before_gru(x)
		x = self.lrelu_keras(x)
		x = x.permute(0, 2, 1)#(batch, filt, time) >> (batch, time, filt)
		x, _ = self.gru(x)
		x = x[:,-1,:]
		code = self.fc1_gru(x)
		if is_test: return code

		code_norm = code.norm(p=2,dim=1, keepdim=True) / 10.
		code = torch.div(code, code_norm)
		out = self.fc2_gru(code)
		return out
		'''
		#for future updates, bc_loss, h_loss
		#h_loss
		norm = torch.norm(self.fc2_gru.weight, dim = 1, keepdim = True)
		normed_weight = torch.div(self.fc2_gru.weight, norm)
		cos_output_tmp = torch.mm(code, normed_weight.t())
		cos_impo = cos_output_tmp.gather(1, y2)
		cos_target = cos_output_tmp.gather(1, y.view(-1, 1))
		hard_negatives, _ = torch.topk(cos_impo, self.negative_k, dim=1, sorted=False)
		hard_negatives = F.relu(hard_negatives, inplace=True)
		trg_score = cos_target*-1.
		h_loss = torch.log(1.+torch.exp(hard_negatives+trg_score).sum(dim=1))
		return out, h_loss
		'''

	def _make_layer(self, nb_blocks, nb_filts, first = False):
		layers = []
		#def __init__(self, nb_filts, first = False):
		for i in range(nb_blocks):
			first = first if i == 0 else False
			layers.append(Residual_block(nb_filts = nb_filts,
				first = first))
			if i == 0: nb_filts[0] = nb_filts[1]

		return nn.Sequential(*layers)

	def summary(self, input_size, batch_size=-1, device="cuda", print_fn = None):
		if print_fn == None: printfn = print
		model = self
	
		def register_hook(module):
	
			def hook(module, input, output):
				class_name = str(module.__class__).split(".")[-1].split("'")[0]
				module_idx = len(summary)
	
				m_key = "%s-%i" % (class_name, module_idx + 1)
				summary[m_key] = OrderedDict()
				summary[m_key]["input_shape"] = list(input[0].size())
				summary[m_key]["input_shape"][0] = batch_size
				if isinstance(output, (list, tuple)):
					summary[m_key]["output_shape"] = [
						[-1] + list(o.size())[1:] for o in output
					]
				else:
					summary[m_key]["output_shape"] = list(output.size())
					if len(summary[m_key]["output_shape"]) != 0:
						summary[m_key]["output_shape"][0] = batch_size
	
				params = 0
				if hasattr(module, "weight") and hasattr(module.weight, "size"):
					params += torch.prod(torch.LongTensor(list(module.weight.size())))
					summary[m_key]["trainable"] = module.weight.requires_grad
				if hasattr(module, "bias") and hasattr(module.bias, "size"):
					params += torch.prod(torch.LongTensor(list(module.bias.size())))
				summary[m_key]["nb_params"] = params
	
			if (
				not isinstance(module, nn.Sequential)
				and not isinstance(module, nn.ModuleList)
				and not (module == model)
			):
				hooks.append(module.register_forward_hook(hook))
	
		device = device.lower()
		assert device in [
			"cuda",
			"cpu",
		], "Input device is not valid, please specify 'cuda' or 'cpu'"
	
		if device == "cuda" and torch.cuda.is_available():
			dtype = torch.cuda.FloatTensor
		else:
			dtype = torch.FloatTensor
		if isinstance(input_size, tuple):
			input_size = [input_size]
		x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
		summary = OrderedDict()
		hooks = []
		model.apply(register_hook)
		model(*x)
		for h in hooks:
			h.remove()
	
		print_fn("----------------------------------------------------------------")
		line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
		print_fn(line_new)
		print_fn("================================================================")
		total_params = 0
		total_output = 0
		trainable_params = 0
		for layer in summary:
			# input_shape, output_shape, trainable, nb_params
			line_new = "{:>20}  {:>25} {:>15}".format(
				layer,
				str(summary[layer]["output_shape"]),
				"{0:,}".format(summary[layer]["nb_params"]),
			)
			total_params += summary[layer]["nb_params"]
			total_output += np.prod(summary[layer]["output_shape"])
			if "trainable" in summary[layer]:
				if summary[layer]["trainable"] == True:
					trainable_params += summary[layer]["nb_params"]
			print_fn(line_new)
	
		# assume 4 bytes/number (float on cuda).
		total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
		total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
		total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
		total_size = total_params_size + total_output_size + total_input_size
	
		print_fn("================================================================")
		print_fn("Total params: {0:,}".format(total_params))
		print_fn("Trainable params: {0:,}".format(trainable_params))
		print_fn("Non-trainable params: {0:,}".format(total_params - trainable_params))
		print_fn("----------------------------------------------------------------")
		print_fn("Input size (MB): %0.2f" % total_input_size)
		print_fn("Forward/backward pass size (MB): %0.2f" % total_output_size)
		print_fn("Params size (MB): %0.2f" % total_params_size)
		print_fn("Estimated Total Size (MB): %0.2f" % total_size)
		print_fn("----------------------------------------------------------------")
		return

In [None]:
# Function to calculate the number of parameters in the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Train Function

In [None]:
def train_model(model, train_loader,val_loader, criterion, optimizer, num_epochs=10):
    train_loss_history = []
    train_acc_history = []
    val_loss_history = []
    val_acc_history = []
    
    start_time = time.time()

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_train_loss = running_loss / len(train_loader.dataset)
        epoch_train_acc = correct / total
        train_loss_history.append(epoch_train_loss)
        train_acc_history.append(epoch_train_acc)

        print(f"Train Loss: {epoch_train_loss:.4f}, Train Accuracy: {epoch_train_acc:.4f}")

        # Validation
        model.eval()
        val_running_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for val_inputs, val_labels in val_loader:
                val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
                val_outputs = model(val_inputs)
                val_loss = criterion(val_outputs, val_labels)
                val_running_loss += val_loss.item() * val_inputs.size(0)
                _, val_predicted = torch.max(val_outputs, 1)
                val_total += val_labels.size(0)
                val_correct += (val_predicted == val_labels).sum().item()

        epoch_val_loss = val_running_loss / len(val_loader.dataset)
        epoch_val_acc = val_correct / val_total
        val_loss_history.append(epoch_val_loss)
        val_acc_history.append(epoch_val_acc)

        print(f"Validation Loss: {epoch_val_loss:.4f}, Validation Accuracy: {epoch_val_acc:.4f}")

    end_time = time.time()  # Record end time
    training_time = end_time - start_time
    print(f"Training Time: {training_time:.2f} seconds")

    # Plotting
    plt.figure(figsize=(10, 5))
    plt.subplot(2, 2, 1)
    plt.plot(range(1, num_epochs + 1), train_loss_history, label='Train Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training Loss')

    plt.figure(figsize=(10, 5))
    plt.subplot(2, 2, 2)
    plt.plot(range(1, num_epochs + 1), val_loss_history, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Validation Loss')

    plt.subplot(2, 2, 3)
    plt.plot(range(1, num_epochs + 1), train_acc_history, label='Train Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Training Accuracy')

    plt.subplot(2, 2, 4)
    plt.plot(range(1, num_epochs + 1), val_acc_history, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Validation Accuracy')

    plt.show()

    # Calculate model size and number of parameters
    model_size_mb = sum(p.numel() for p in model.parameters()) / (1024 * 1024)
    num_parameters = count_parameters(model)
    print(f"Model Size: {model_size_mb:.2f} MB")
    print(f"Number of Parameters: {num_parameters}")

    # Save the trained model
    torch.save(model.state_dict(), 'Rawnet.pth')
    torch.save(model, "Rawnet.pt")

    return model


## Test function

In [None]:

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    y_true = []
    y_pred = []
    inference_start_time = time.time()

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    inference_end_time = time.time()
    inference_time = inference_end_time - inference_start_time
    print(f"Inference Time: {inference_time:.4f} seconds")
    
    test_accuracy = correct / total
    print('Test Accuracy:', test_accuracy)

    # Confusion matrix
    conf_matrix = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(conf_matrix)

    # Classification report
    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=test_dataset.classes))

    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=test_dataset.classes, yticklabels=test_dataset.classes)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix (Test)')
    plt.show()

    # Plot classification report
    plt.figure(figsize=(8, 6))
    sns.heatmap(pd.DataFrame.from_dict(classification_report(y_true, y_pred, target_names=test_dataset.classes, output_dict=True)), annot=True, cmap='Blues')
    plt.xlabel('Metrics')
    plt.ylabel('Classes')
    plt.title('Classification Report (Test)')
    plt.show()

# Bayesian optimization

In [None]:
def validate_model(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [None]:
# Define hyperparameter space for Bayesian optimization
search_space = [
    Real(1e-6, 1e-2, name='learning_rate')
]
sample_rate= 44100

results_file = "optimization_results_rawnet.txt"

# Perform Bayesian optimization
@use_named_args(search_space)
def optimize_model(learning_rate):
    # Define model architecture and other necessary components
    model = RawNet().to(device)
    
    criterion = torch.nn.CrossEntropyLoss()
    weight_decay = 0.0001
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    # Train the model
    trained_model = train_model(model, train_loader, validation_loader, criterion, optimizer, num_epochs=50)
    val_accuracy = validate_model(trained_model, validation_loader)

    print(f"Learning Rate: {learning_rate}, weight_decay: {weight_decay}, Validation Accuracy: {val_accuracy}")

    with open(results_file, 'a') as f:
        f.write(f"Learning Rate: {learning_rate}, weight_decay: {weight_decay}, Validation Accuracy: {val_accuracy}\n")
    
    
    # Return the validation accuracy as the optimization target
    return -val_accuracy 

res_gp = gp_minimize(optimize_model, search_space, n_calls=15, random_state=1016)

best_params = dict(zip(['learning_rate'], res_gp.x))
print("Best hyperparameters:", best_params)

# Train model with best hyperparameters
best_accuracy = -res_gp.fun
print("Best accuracy:", best_accuracy)

# Create the model

In [None]:
model = RawNet().to(device)
print(model)

In [None]:
model_summary(model)

# Initialize model, criterion, and optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
weight_decay = 0.0001
optimizer = optim.Adam(model.parameters(), lr=res_gp.x[0], weight_decay=weight_decay)


# Train the model

In [None]:
train_model(model, train_loader, validation_loader, criterion, optimizer, num_epochs=50)

# Evaluate the model

In [None]:
evaluate_model(model, test_loader)