<a href="https://colab.research.google.com/github/Faisal-NSU/CSE465/blob/main/Making%20pickle%20obvi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Utils


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import sys


def plot_images(images, nrows = None, ncols = None, figsize = None, ax = None, 
                axis_style = 'on', bgr2rgb = True):
    '''
    Plots a given list of images and returns axes.Axes object
    
    Parameters
    -----------
    images: list
            A list of images to plot
            
    nrows: int
           Number of rows to arrange images into
    
    ncols: int
           Number of columns to arrange images into
    
    figsize: tuple
             Plot size (width, height) in inches
           
    ax: axes.Axes object
        The axis to plot the images on, new axis will be created if None
        
    axis_style: str
                'off' if axis are not to be displayed
    '''
    N = len(images)
    if not isinstance(images, (list, np.ndarray)):
        raise AttributeError("The images parameter should be a list of images, "
                             "if you want to plot a single image, pass it as a "
                             "list of single image")

    # Setting nrows and ncols as per parameter input
    if nrows is None:
        if ncols is None:
            nrows = N
            ncols = 1
        else:
            nrows = int(np.ceil(N / ncols))
    else:
        if ncols is None:
            ncols = int(np.ceil(N / nrows))
    
    if ax is None:
        _, ax = plt.subplots(nrows, ncols, figsize = figsize)
    
    if len(images) == 1:
        if bgr2rgb == True:
            images[0] = cv2.cvtColor(images[0], cv2.COLOR_BGR2RGB)
    
        ax.imshow(images[0])
        ax.axis(axis_style)
        
        return ax
    
    else:
        for i in range(nrows):
            for j in range(ncols):
                if (i * ncols + j) < N:
                    img = images[i * ncols + j]
                    
                    if bgr2rgb == True:
                            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    
                    # For this condition, ax is a 2d array else a 1d array
                    if nrows >1 and ncols > 1: 
                        ax[i][j].imshow(img)
                    
                    else:
                        ax[i + j].imshow(img)
                
                if nrows > 1 and ncols > 1:
                    ax[i][j].axis(axis_style)
                else:
                    ax[i + j].axis(axis_style)
        
        return ax


def drawProgressBar(current, total, string = '', barLen = 20):
    '''
    Draws a progress bar, something like [====>    ] 20%
    
    Parameters
    ------------
    current: int/float
             Current progress
    
    total: int/float
           The total from which the current progress is made
             
    string: str
            Additional details to write along with progress
    
    barLen: int
            Length of progress bar
    '''
    percent = current/total
    arrow = ">"
    if percent == 1:
        arrow = ""
    # Carriage return, returns to the begining of line to owerwrite
    sys.stdout.write("\r")
    sys.stdout.write("Progress: [{:<{}}] {}/{}".format("=" * int(barLen * percent) + arrow, 
                                                         barLen, current, total) + string)
    sys.stdout.flush()

# Imports


In [2]:
import IPython.display as ipd
import librosa
import librosa.display

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Drive Mount


In [3]:
!gdown --id 1k-afEJdwz5Tf4-bsuKOJzP7xn-KQTlkM

Downloading...
From: https://drive.google.com/uc?id=1k-afEJdwz5Tf4-bsuKOJzP7xn-KQTlkM
To: /content/SUBESCO.zip
100% 1.65G/1.65G [00:14<00:00, 110MB/s]


In [4]:
import zipfile
dataset_directory = '/content/SUBESCO.zip'
zip_ref = zipfile.ZipFile(dataset_directory, 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [5]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.get_device_name(0)

'Tesla K80'

# Getting Features


## Define Functions

In [6]:
def get_fixed_audio_len(wav, sr, audio_len):
    '''
    Converts a time-series audio to a fixed length either by padding or trimming
    
    Parameters
    -------------
    wav: Audio time-series
    
    sr: Sample rate
    
    audio_len: The fixed audio length needed in seconds
    '''
    if wav.shape[0] < audio_len * sr:
        wav = np.pad(wav, int(np.ceil((audio_len * sr - wav.shape[0])/2)), mode = 'reflect')
    wav = wav[:audio_len * sr]
    
    return wav

def get_melspectrogram_db(wav, sr, audio_len = 4, n_fft = 2048, hop_length = 512, 
                          n_mels = 128, fmin = 20, fmax = 8300, top_db = 80):
    '''
    Decomposes the audio sample into different frequencies using fourier transform 
    and converts frequencies to mel scale and amplitude to decibel scale.
    
    Parameters
    -------------------
    wav: Audio time-series
    
    sr: Sample rate
    
    audio_len: The fixed length of audio in seconds
    
    n_fft: Length of the Fast Fourier Transform window
    
    hop_length: Number of samples between successive frames
    
    n_mels: Number of mel filters, which make the height of spectrogram image
    
    fmin: Lowest frequency
    
    fmax: Heighest frequency
    
    top_db: Threashold of the decibel scale output
    '''
    wav = get_fixed_audio_len(wav, sr, audio_len)
        
    spec = librosa.feature.melspectrogram(wav, sr = sr, n_fft = n_fft, hop_length = hop_length, 
                                          n_mels = n_mels, fmin = fmin, fmax = fmax)
    
    spec = librosa.power_to_db(spec, top_db = top_db)
    return spec

def spec_to_image(spec):
    '''
    Converts the spectrogram to an image
    
    Parameters
    -------------
    spec: Spectrogram
    '''
    eps=1e-6
    
    # Z-score normalization
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    
    # Min-max scaling
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)
    
    return spec_scaled

## Splitting Val

# Training an Artificial Neural Network on time-series audio data

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils

# Defining dataset batch size
BATCH_SIZE = 32

def load_data(df, id_col, label_col = None, data_path = './', audio_len = 4):
    '''
    Loads the audio time-series data
    
    Parameters
    -------------
    df: The dataframe that contains the file name and corresponding label
    
    id_col: The column name that contains the file name
    
    label_col: The column name that contains the label
    '''
    audio_time_series = []
    sample_rates = []
    labels = []
    
    tot = len(df)
    curr = 0
    
    for idx in df.index:
        try:
            file_name = str(df['filename'][idx]) 
            
            temp = data_path + file_name
            wav, sr = librosa.load(str(temp))
            
            wav = get_fixed_audio_len(wav, sr, audio_len)
    
            audio_time_series.append(wav)
            sample_rates.append(sr)
            
            if label_col is not None:
                labels.append(df[label_col][idx])
            
            curr += 1
            drawProgressBar(curr, tot, barLen = 40)
        
        except KeyboardInterrupt:
            print('KeyBoardInterrupt')
            break
        
        except Exception:
            print("Couldn't read file", df[id_col][idx])
            curr += 1
            
    print('\n')
    return np.stack(audio_time_series, axis = 0), np.array(sample_rates), np.array(labels)

In [8]:
TRAIN_CSV = '/content/SUBESCO/train/train.csv'
TEST_CSV = '/content/SUBESCO/test/test.csv'
VALID_CSV = '/content/SUBESCO/valid/valid.csv'

TRAIN_PATH = '/content/SUBESCO/train/'
TEST_PATH = '/content/SUBESCO/test/'
VALID_PATH = '/content/SUBESCO/valid/'


In [16]:
import time
import pandas as pd
start_time = time.time()

train_time_series, _, train_labels = load_data(pd.read_csv(TRAIN_CSV), 'filename', 'label_id', TRAIN_PATH)

val_time_series, _, val_labels = load_data(pd.read_csv(VALID_CSV), 'filename', 'label_id', VALID_PATH)

test_time_series, _, test_labels = load_data(pd.read_csv(TEST_CSV), 'filename', 'label_id', TEST_PATH)

print("--- %s seconds ---" % (time.time() - start_time))



--- 536.0539813041687 seconds ---


In [9]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [18]:
#saving pickle
import bz2
import pickle

dataDic = { 'train_time_series': train_time_series, 'train_labels': train_labels,
            'val_time_series': val_time_series, 'val_labels': val_labels,
            'test_time_series': test_time_series, 'test_labels': test_labels,}

filename = 'dataDic'
outfile = open(filename,'wb')

pickle.dump(dataDic,outfile)
outfile.close()

In [19]:
infile = open(filename,'rb')
dataDic = pickle.load(infile)
infile.close()

In [20]:
dataDic['train_time_series'].shape,dataDic['val_time_series'].shape,dataDic['test_time_series'].shape

((4900, 88200), (700, 88200), (1400, 88200))

In [21]:
!cp -r '/content/dataDic' /content/gdrive/MyDrive/CSE465

In [42]:
# Convert numpy arrays to torch tensors
train_time_series = torch.from_numpy(dataDic['train_time_series'])
train_labels = torch.from_numpy(dataDic['train_labels']).long()

val_time_series = torch.from_numpy(dataDic['val_time_series'])
val_labels = torch.from_numpy(dataDic['val_labels']).long()

test_time_series = torch.from_numpy(dataDic['test_time_series'])
test_labels = torch.from_numpy(dataDic['test_labels']).long()

# Create data loaders
train_time_series = data_utils.TensorDataset(train_time_series, train_labels)
train_loader = data_utils.DataLoader(train_time_series, batch_size = BATCH_SIZE, shuffle = True)

val_time_series = data_utils.TensorDataset(val_time_series, val_labels)
val_loader = data_utils.DataLoader(val_time_series, batch_size = BATCH_SIZE, shuffle = True)

test_time_series = data_utils.TensorDataset(test_time_series, test_labels)
test_loader = data_utils.DataLoader(test_time_series, batch_size = BATCH_SIZE, shuffle = True)


In [43]:
classes ={0: 'ANGRY',
 1: 'DISGUST',
 2: 'FEAR',
 3: 'HAPPY',
 4: 'NEUTRAL',
 5: 'SAD',
 6: 'SURPRISE'}

In [45]:
# Defining training parameters
LEARNING_RATE = 0.001
EPOCHS = 10
NUM_CLASSES = len(classes)

N_FEATURES = train_time_series[0][0].shape[0]
N_FEATURES

88200

In [46]:
def evaluate(model, test_loader):
    '''
    Returns the accuracy and loss of a model
    
    Parameters
    --------------
    model: A PyTorch neural network
    
    test_loader: The test dataset in the form of torch DataLoader
    '''
    model.eval()
    num_test_batches = len(test_loader)
    with torch.no_grad():
        correct = 0
        total = 0
        total_loss = 0
        for i, batch in enumerate(test_loader):
            inputs, labels = batch
            outputs = model(inputs)
            _, predicted = torch.max(outputs, dim = 1)
            loss = loss_fn(outputs, labels)
            total_loss += loss.item()

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Printing progress
            drawProgressBar((i+1), num_test_batches)
        
        accuracy = correct/total
        test_loss = total_loss/num_test_batches
    
    return accuracy, test_loss

# Training a Convolutional neural network on spectrogram images

In [47]:
train_sr = 22050
val_sr = 22050
test_sr = 22050

def get_spec_loader(audio_time_series, sr, batch_size, shuffle = False):
    '''
    Returns data loader of spectrogram images
    
    Parameters
    ------------
    audio_time_series: Tensor Dataset with wav, label iterables
    
    sr: Sample rate
    
    batch_size: The batch size of data loader
    '''
    audio_spec_img = []
    labels = []
    curr = 0
    tot = len(audio_time_series)

    for wav, label in audio_time_series:
        spec_img = spec_to_image(get_melspectrogram_db(wav.numpy(), sr))
        spec_img = np.expand_dims(spec_img, axis = 0)
        audio_spec_img.append(spec_img)
        labels.append(label)

        curr += 1
        drawProgressBar(curr, tot, barLen = 40)

    audio_spec_img = torch.Tensor(audio_spec_img)
    audio_spec_img = audio_spec_img / 255
    
    labels = torch.Tensor(labels).long()

    audio_spec_img = data_utils.TensorDataset(audio_spec_img, labels)
    audio_loader = data_utils.DataLoader(audio_spec_img, batch_size = batch_size, shuffle = shuffle)
    
    return audio_loader

In [48]:
# Getting the spectrogram image for each audio in train set
start_time = time.time()
train_loader = get_spec_loader(train_time_series, train_sr, BATCH_SIZE, shuffle = True)
val_loader = get_spec_loader(val_time_series, val_sr, BATCH_SIZE, shuffle = True)
test_loader = get_spec_loader(test_time_series, test_sr, BATCH_SIZE, shuffle = True)
print("--- %s seconds ---" % (time.time() - start_time))



## Model cnn

In [49]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        
        # Layer 1, Input shape (1, 128, 173) ->  Output shape (8, 62, 84)
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels = 1, out_channels = 8, kernel_size = (5, 6)), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = (2, 2)))
        
        # Layer 2, Input shape (8, 62, 84) -> Output shape (16, 30, 41)
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = (3, 3)), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = (2, 2)))
        
        # Layer 3, Input shape (16, 30, 41) -> Output shape (64, 10, 15)
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = (6, 7)), 
            nn.ReLU(), 
            nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = (6, 6)), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = (2, 2)))
        
        # Fully Connected layer 1, Input features 64 * 10 * 15 -> Output features 512
        self.fc1 = nn.Linear(in_features = 64 * 10 * 15, out_features = 512)
        
        # Fully Connected layer 2, Input features 512 -> Output features 256
        self.fc2 = nn.Linear(in_features = 512, out_features = 256)
        
        # Fully Connected layer 3, Input features 256 -> Output features 128
        self.fc3 = nn.Linear(in_features = 256, out_features = 128)
        
        # Fully Connected layer 4, Input features 128 -> Output features 7
        self.fc4 = nn.Linear(in_features = 128, out_features = NUM_CLASSES)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        n_features = 1
        for s in size:
            n_features = n_features * s
        
        return n_features

In [58]:
# Defining loss and optimizer
NUM_CLASSES = len(classes)
model = ConvNet().to(device)

In [66]:
from torch import nn
BATCH_SIZE = 32
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
#step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

In [67]:
def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    size = len(dataloader.dataset)  
    for batch, (X,y) in enumerate(dataloader):
        # Compute prediction and loss
        X, y = X.to(device), y.to(device)
        
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 50 == 0:
            loss, current = loss.item(), batch * len(X)
            
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    return model
def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for (X,y) in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [68]:
import math
import time
EPOCHS = 30
start = time.time()
for t in range(EPOCHS):
    print(f"Epoch {t+1}\n-------------------------------")
    model = train_loop(train_loader, model, criterion, optimizer)
    test_loop(val_loader, model, criterion)
final = (time.time() - start)/60
print(f"Done for all {EPOCHS} epochs in {math.ceil(final)} minutes\n")

Epoch 1
-------------------------------
loss: 0.851983  [    0/ 4900]
loss: 0.469104  [ 1600/ 4900]
loss: 0.816316  [ 3200/ 4900]
loss: 0.833359  [ 4800/ 4900]
Test Error: 
 Accuracy: 44.3%, Avg loss: 2.133436 

Epoch 2
-------------------------------
loss: 0.912570  [    0/ 4900]
loss: 0.682967  [ 1600/ 4900]
loss: 0.753245  [ 3200/ 4900]
loss: 0.745694  [ 4800/ 4900]
Test Error: 
 Accuracy: 43.4%, Avg loss: 2.028360 

Epoch 3
-------------------------------
loss: 0.703111  [    0/ 4900]
loss: 0.940002  [ 1600/ 4900]
loss: 0.517040  [ 3200/ 4900]
loss: 0.599516  [ 4800/ 4900]
Test Error: 
 Accuracy: 45.4%, Avg loss: 2.402766 

Epoch 4
-------------------------------
loss: 0.878561  [    0/ 4900]
loss: 0.569081  [ 1600/ 4900]
loss: 0.558859  [ 3200/ 4900]
loss: 0.492488  [ 4800/ 4900]
Test Error: 
 Accuracy: 46.3%, Avg loss: 2.103584 

Epoch 5
-------------------------------
loss: 0.671384  [    0/ 4900]
loss: 0.616357  [ 1600/ 4900]
loss: 0.712727  [ 3200/ 4900]
loss: 0.846066  [ 4800

In [None]:
def test_single_epoch(model, dataloader, device):
  correct = 0
  size = len(dataloader.dataset)

  model.eval()
  for input,target in dataloader:
        input, target = input.to(device), target.to(device)
        # calculate loss
        prediction = model(input)
        correct += (prediction.argmax(1) == target).type(torch.float).sum().item()
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}% \n")

In [None]:
test_single_epoch(model,test_loader,device)