In [None]:
!pip install snntorch

In [1]:
# imports
import snntorch as snn
from snntorch import surrogate
from snntorch import backprop
from snntorch import functional as SF
from snntorch import utils
from snntorch import spikeplot as splt

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

import matplotlib.pyplot as plt
import numpy as np
import itertools

  from snntorch import backprop


In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# importing sound classification models from torchaudio
import torchaudio

import matplotlib.pyplot as plt
import tqdm

In [None]:
# List the available devices:
print("Available devices:")
print(torch.cuda.device_count())
print("List of devices:")
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))
print(torch.cuda.device(1))
print(torch.cuda.get_device_name(1))
device_ids = [0, 1]

In [None]:
# using gpus if available
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#print(device)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"  # to use both GPUs

## Dataset

In [None]:
os.listdir('/kaggle/input/')

In [None]:
# Loading the dataset
## Structure of the dataset is as follows:
### dataset_folder -> Folder_name_of_one_class -> audio_file.wav
dataset_folder = '/kaggle/input/TrainSet'

animals_folder = dataset_folder + '/Animals'+'/animals_segmented'
music_folder = dataset_folder + '/Music'+'/instruments_segmented'
sot_folder = dataset_folder + '/SoT'+'/sound_of_things_segmented'

print(animals_folder)

In [None]:
# Putting the data paths into dictionaries (key: class, value: list of file names)
data_paths = {
    'animals': [os.path.join(animals_folder, file) for file in os.listdir(animals_folder)],
    'music': [os.path.join(music_folder, file) for file in os.listdir(music_folder)],
    'sot': [os.path.join(sot_folder, file) for file in os.listdir(sot_folder)]
}

animals_dict = {
    "animals": 0,
    "music": 1,
    "sot": 2
}

# print len of each class
print(len(data_paths['animals']))
print(len(data_paths['music']))
print(len(data_paths['sot']))
print("Total number of files: ", len(data_paths['animals']) + len(data_paths['music']) + len(data_paths['sot']))

print(data_paths['animals'][:5])

In [None]:
# Tuple to tensor of numbers
def name_tuple_to_float_tensor(tuple):
    # Read the content of the tupe and use animals_dict to convert the class name to a number in a new tensor
    return torch.tensor([animals_dict[tuple[i]] for i in range(len(tuple))], dtype=torch.float32)

In [None]:
import torch
from torch.utils.data import Dataset
num_classes = 3
class AudioDataset(Dataset):
    def __init__(self, data_paths, transform=None):
        self.data_paths = data_paths
        self.classes = list(data_paths.keys())
        self.files = sum([data_paths[cls] for cls in self.classes], [])
        self.transform = transform
        self.printPath = False

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        for cls in self.classes:
            if idx < len(self.data_paths[cls]):
                file = self.data_paths[cls][idx]
                classe = cls
                break
            idx -= len(self.data_paths[cls])
        else:
            raise IndexError('Index out of range')

        waveform, sample_rate = torchaudio.load(file)
        if self.transform:
            mfcc = self.transform(waveform)
        
        if True:
            # Taking a mean tensor of the mfcc 2 channel tensor
            mfcc = torch.mean(mfcc, dim=0).detach()  # Detach the tensor here
    
        if self.printPath:
            return mfcc, sample_rate, classe, file
        else:
            return mfcc, sample_rate, classe

In [None]:
# Import necessary libraries
from torchvision.transforms import Compose, RandomApply
n_mfcc_values = 20

# Define a Mel-frequency cepstral coefficients (MFCC) transform
transforms = torchaudio.transforms.MFCC(
    sample_rate= 48000,
    n_mfcc=n_mfcc_values)

# Define a composition of transforms including pitch shift, frequency masking, time masking, and MFCC
transforms_2 = Compose([
    RandomApply([torchaudio.transforms.PitchShift(sample_rate=48000, n_steps=2)], p=0.4),  # Apply pitch shift randomly with a probability of 0.4
    RandomApply([torchaudio.transforms.FrequencyMasking(freq_mask_param=15)], p=0.2),  # Apply frequency masking randomly with a probability of 0.2 (SpecAugment)
    RandomApply([torchaudio.transforms.TimeMasking(time_mask_param=35)], p=0.2),  # Apply time masking randomly with a probability of 0.2 (SpecAugment)
    torchaudio.transforms.MFCC(sample_rate=48000, n_mfcc=n_mfcc_values),  # Apply MFCC
])

# Define a composition of transforms including sliding window CMN and MFCC
transform_norm_and_MFFC = Compose([
    torchaudio.transforms.SlidingWindowCmn(cmn_window=600, min_cmn_window=100, center=False, norm_vars=False), # Apply sliding window cepstral mean normalization (CMN)
    torchaudio.transforms.MFCC(sample_rate=48000, n_mfcc=n_mfcc_values),  # Apply MFCC
])

In [None]:
dataset1 = AudioDataset(data_paths, transform=transforms)
dataset2 = AudioDataset(data_paths, transform=transforms_2)
dataset3 = AudioDataset(data_paths, transform=transform_norm_and_MFFC)
mfcctensor, sample_rate, classe = dataset1.__getitem__(0)
print(mfcctensor.shape, sample_rate, classe)

print(mfcctensor)

In [None]:
import librosa.display
import matplotlib.pyplot as plt

# Select one channel of the MFCC tensor
mfcc_channel_1 = mfcctensor.detach().numpy() # mfcctensor[0].detach().numpy()
print(mfcc_channel_1.shape)
# Plot the MFCC
plt.figure(figsize=(10, 4))
librosa.display.specshow(mfcc_channel_1, x_axis='time')
plt.colorbar()
plt.title('MFCC')
plt.tight_layout()
plt.show()

In [None]:
import multiprocessing

num_cores = multiprocessing.cpu_count()
print(f'Number of CPU cores: {num_cores}')

In [None]:
# Create a dataloader
BATCH_SIZE = 32

# Split the dataset into training, validation and test sets
train_size = int(0.8 * (len(dataset1))) # +len(dataset2))) #int(0.8 * (len(dataset1) + len(dataset2) + len(dataset3)))
val_size = int(0.15 * (len(dataset1))) # + len(dataset2))) #int(0.1 * (len(dataset1) + len(dataset2) + len(dataset3)))
test_size = len(dataset1) - train_size - val_size # len(dataset1)  + len(dataset2) - train_size - val_size #len(dataset1) + len(dataset2) + len(dataset3) - train_size - val_size

dataset1.printPath = True
dataset2.printPath = True
dataset3.printPath = True

#train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset1, [train_size, val_size, test_size]) # torch.utils.data.random_split(dataset1 + dataset2, [train_size, val_size, test_size]) # torch.utils.data.random_split(dataset1 + dataset2 + dataset3, [train_size, val_size, test_size])
print(len(train_dataset), len(val_dataset), len(test_dataset))
print(len(train_dataset) + len(val_dataset) + len(test_dataset))

# Create dataloaders for the training, validation and test sets
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, pin_memory=True)

In [None]:
# get the first element of the dataloader
dataiter = iter(train_dataloader)
mfcctensor, sample_rate, classe, file = next(dataiter)
print(mfcctensor.shape, sample_rate, classe)

## Models

In [None]:
# dataloader arguments
batch_size = BATCH_SIZE

dtype = torch.float

In [None]:
# neuron and simulation parameters
spike_grad = surrogate.fast_sigmoid(slope=25)
beta = 0.5
num_steps = 50

In [None]:
def forward_pass(net, num_steps, data):
    mem_rec = []
    spk_rec = []
    utils.reset(net)  # resets hidden states for all LIF neurons in the network

    for step in range(num_steps):
        spk_out, mem_out = net(data)
        spk_rec.append(spk_out)
        mem_rec.append(mem_out)
    return torch.stack(spk_rec).cuda(), torch.stack(mem_rec).cuda()

In [None]:
def batch_accuracy(train_dataloader, net, num_steps):
  with torch.no_grad():
    total = 0
    acc = 0
    net.eval()

    train_dataloader = iter(train_dataloader)
    for data, _, targets, _ in train_dataloader:
      data = data.cuda()
      data = data.unsqueeze(1)
      targets = name_tuple_to_float_tensor(targets).cuda().long()
      spk_rec, _ = forward_pass(net, num_steps, data)

      acc += SF.accuracy_rate(spk_rec, targets) * spk_rec.size(1)
      total += spk_rec.size(1)

  return acc/total

## Simple Model

In [None]:
class CSNN(nn.Module):
    def __init__(self):
        super().__init__()

        # First convolutional layer: 1 input channel, 12 output channels, kernel size of 5
        self.conv1 = nn.Conv2d(1, 12, 5)  
        # First Leaky integrate-and-fire (LIF) neuron
        self.lif1 = snn.Leaky(beta=beta, spike_grad=spike_grad)  
        # Second convolutional layer: 12 input channels, 64 output channels, kernel size of 3
        self.conv2 = nn.Conv2d(12, 64, 3)  
        # Second LIF neuron
        self.lif2 = snn.Leaky(beta=beta, spike_grad=spike_grad)  
        # Fully connected layer: 38208 input features, 3 output features
        self.fc1 = nn.Linear(38208 , 3)  
        # Convert the features into spike (fire or not)
        self.lif3 = snn.Leaky(beta=beta, spike_grad=spike_grad)  

    def forward(self, x):

        # Initialize membrane potentials for each LIF neuron at t=0
        mem1 = self.lif1.init_leaky()  
        mem2 = self.lif2.init_leaky()  
        mem3 = self.lif3.init_leaky()  

        # Forward pass through the network
        # Apply first convolutional layer and max pooling
        cur1 = F.max_pool2d(self.conv1(x), 2)  
        # Apply first LIF neuron
        spk1, mem1 = self.lif1(cur1, mem1)  
        # Apply second convolutional layer
        cur2 = self.conv2(spk1)  
        # Apply max pooling
        cur2 = F.max_pool2d(cur2, 2)  
        # Apply second LIF neuron
        spk2, mem2 = self.lif2(cur2, mem2)  
        # Flatten the output and apply the fully connected layer
        cur3 = self.fc1(spk2.view(batch_size, -1))  
        # Apply third LIF neuron
        spk3, mem3 = self.lif3(cur3, mem3)  

        # Return the output spikes and membrane potentials
        return spk3, mem3  

# Instantiate the network and move it to the GPU
net = CSNN().cuda()  

In [None]:
#  Initialize Network - Better for using 2 GPUs
net = nn.Sequential(nn.Conv2d(1, 12, 5),
                    nn.MaxPool2d(2),
                    snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True),
                    nn.Conv2d(12, 64, 5), # nn.Conv2d(12, 64, 5),
                    nn.MaxPool2d(2),
                    snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True),
                    nn.Flatten(),
                    nn.Linear(76544, 3),
                    snn.Leaky(beta=beta, spike_grad=spike_grad, init_hidden=True, output=True)
         )
net = nn.DataParallel(net, device_ids=device_ids).cuda()

In [None]:
loss_fn = SF.ce_rate_loss()

In [None]:
torch.cuda.empty_cache()

In [None]:
optimizer = optim.Adam(net.parameters(), lr=1e-5, betas=(0.9, 0.999))
scheduler = StepLR(optimizer, step_size=10, gamma=0.95) 

num_epochs = 20
loss_hist = []
test_acc_hist = []
counter = 0

# Outer training loop
for epoch in tqdm.trange(num_epochs):
    # Training loop
    for data, _, targets, _ in iter(train_dataloader):
        data = data.cuda()
        data = data.unsqueeze(1)
        targets = name_tuple_to_float_tensor(targets).cuda()
        targets = targets.long()

        # forward pass
        net.train()
        spk_rec, _ = forward_pass(net, num_steps, data)

        # initialize the loss & sum over time
        loss_val = loss_fn(spk_rec, targets)

        # Gradient calculation + weight update
        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()

        # Store loss history for future plotting
        loss_hist.append(loss_val.item())
        scheduler.step()  # Update the learning rate

        # Test set
        if counter % 50 == 0:
            with torch.no_grad():
                net.eval()
                test_acc = batch_accuracy(val_dataloader, net, num_steps)
                print(f"Iteration {counter}, Test Acc: {test_acc * 100:.2f}%\n")
                test_acc_hist.append(test_acc.item())
        counter += 1

In [None]:
# Plot Loss
fig = plt.figure(facecolor="w")
plt.plot(test_acc_hist)
plt.title("Test Set Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.show()

In [None]:
torch.cuda.empty_cache()

In [None]:
# get the first element of the dataloader
dataiter = iter(test_dataloader)
data, sample_rate, classe, file = next(dataiter)
print(data.shape, sample_rate, classe, file)

data = data.cuda()
data = data.unsqueeze(1)

In [None]:
spk_rec, mem_rec = forward_pass(net, num_steps, data)

In [None]:
from IPython.display import HTML

idx = 0
fig, ax = plt.subplots(facecolor='w', figsize=(12, 7))
labels=['animals', 'music', 'sot']
# plt.rcParams['animation.ffmpeg_path'] = 'C:\\path\\to\\your\\ffmpeg.exe'
print(spk_rec.shape)
#  Plot spike count histogram
anim = splt.spike_count(spk_rec[:, idx].detach().cpu(), fig, ax, labels=labels,
                        animate=True, interpolate=4)
HTML(anim.to_html5_video())
# anim.save("spike_bar.mp4")

In [None]:
print(f"The target label is: {targets[idx]}")

# Read audio file
waveform, sample_rate = torchaudio.load(file[idx])
print(waveform.shape, sample_rate)

# Read audio ipython
import IPython.display as ipd
ipd.Audio(waveform, rate=sample_rate)

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Get the predictions for the test set
y_pred = []
y_true = []

with torch.no_grad():
    for data, _, labels, _ in test_dataloader:
        data = data.cuda()
        data = data.unsqueeze(1)
        labels = name_tuple_to_float_tensor(labels).cuda()
        labels = labels.long()
        outputs, _ = net(data)  # Assuming the model output is a tuple (spk3, mem3)
        _, predicted = torch.max(outputs.data, 1)
        y_pred += predicted.tolist()
        y_true += labels.tolist()

# Compute and print a pretty confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Create a dataframe from the confusion matrix
df_cm = pd.DataFrame(conf_matrix, index=[i for i in range(num_classes)],
                     columns=[i for i in range(num_classes)])

plt.figure(figsize=(10, 7))
sns.heatmap(df_cm, annot=True, cmap='Blues')

# Print the classification report
from sklearn.metrics import classification_report
class_names = [f'Class {i}' for i in range(num_classes)]
print(classification_report(y_true, y_pred, target_names=class_names))

In [None]:
# Save the model
torch.save(net.state_dict(), 'csnn_simple_model.pth')