## Google Colab, Drive Configuration & Imports


In [None]:
from google.colab import drive

import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as tf

import matplotlib.pyplot as plt
import matplotlib.image as img

from PIL import Image
import scipy.io as sio
import scipy

import os
import numpy
import numpy as np

#for loading and visualizing audio files
import librosa
import librosa.display

#to play audio
import IPython.display as ipd
import skimage.io

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)
data_path = '/content/drive/Shareddrives/DeepLearning/Projecte_Final/Data/'
spec_path = '/content/drive/Shareddrives/DeepLearning/Projecte_Final/Spectograms/'
results_path = '/content/drive/Shareddrives/DeepLearning/Projecte_Final/Results/'

Mounted at /content/drive


In [None]:
def pad_audio(aud_arr, size, axis = 1):
    for i in range(size - aud_arr.shape[axis]):
        pad = np.ones((aud_arr.shape[0], 1))
        aud_arr = np.concatenate((aud_arr, pad), axis)
    
    return aud_arr

In [None]:
import pandas as pd

def drop_sg_outliers(data_mat,shapes):
  
  df = pd.DataFrame(shapes)
  Q1=df.quantile(0.25)
  Q3=df.quantile(0.75)
  IQR=Q3-Q1
  df_clean=df[~((df<(Q1-1.5*IQR)) | (df>(Q3+1.5*IQR)))]
  indices = [index[0] for index in np.argwhere(df_clean.notnull().values).tolist()]
  return [data_mat[i] for i in indices]


## Cleaning Spectograms
Check all spectograms, drop the outliers and resize the inliers so all spectograms have the same length

In [None]:
words = ['catapulta', 'hola', 'iu', 'mar', 'taula', 'victor']
dts = ['Train', 'Test']

originals_path = spec_path + "original/"
resized_path = spec_path + "resized/"

word_max = []
word_index = {}

for word in words:
    print("------", word)
    durations = []
    for dt in dts:
        
        directory = originals_path + word + "/" + dt + "/"
        n_spec = len(os.listdir(directory))

        for i in range(n_spec):
            file_name = directory + f'/%d.png'%(i+1)
            try:
                image = img.imread(file_name)  # Convert images to matrix
                durations.append(image.shape[1])
            except:
                durations.append(-1)  # Some images are corrupted

            if (i % 50 == 0):
                print("Vamo por la : ", i, "/", n_spec)
    
    # Drop spectograms following the IQR

    df = pd.DataFrame(durations)
    Q1=df.quantile(0.25)
    Q3=df.quantile(0.75)
    IQR=Q3-Q1
    df_clean=df[~((df<(Q1-1.5*IQR)) | (df>(Q3+1.5*IQR)))]
    indices = [index[0] for index in np.argwhere(df_clean.notnull().values).tolist()]

    word_index[word] = indices
    word_max.append( int(df_clean.max()) )

print("word maxes:", word_max)

for word in words:              # Resize all spectograms to the largest one
    print("------", word)
    for dt in dts:
        for index in word_index[word]:
            index += 1
            try:
                image = Image.open(originals_path + word + "/" + dt + f'/%d.png'%(index))
                image = image.resize(( max(word_max), 128 ), Image.ANTIALIAS)
                image.save(resized_path + word + "/" + dt + f'/%d.png'%(index))
            except:
                continue


------ catapulta
Vamo por la :  0 / 447
Vamo por la :  50 / 447
Vamo por la :  100 / 447
Vamo por la :  150 / 447
Vamo por la :  200 / 447
Vamo por la :  250 / 447
Vamo por la :  300 / 447
Vamo por la :  350 / 447
Vamo por la :  400 / 447
Vamo por la :  0 / 125
Vamo por la :  50 / 125
Vamo por la :  100 / 125
------ hola
Vamo por la :  0 / 1000
Vamo por la :  50 / 1000
Vamo por la :  100 / 1000
Vamo por la :  150 / 1000
Vamo por la :  200 / 1000
Vamo por la :  250 / 1000
Vamo por la :  300 / 1000
Vamo por la :  350 / 1000
Vamo por la :  400 / 1000
Vamo por la :  450 / 1000
Vamo por la :  500 / 1000
Vamo por la :  550 / 1000
Vamo por la :  600 / 1000
Vamo por la :  650 / 1000
Vamo por la :  700 / 1000
Vamo por la :  750 / 1000
Vamo por la :  800 / 1000
Vamo por la :  850 / 1000
Vamo por la :  900 / 1000
Vamo por la :  950 / 1000
Vamo por la :  0 / 184
Vamo por la :  50 / 184
Vamo por la :  100 / 184
Vamo por la :  150 / 184
------ iu
Vamo por la :  0 / 676
Vamo por la :  50 / 676
Vamo p

## Creating matrices
Read all spectograms and create Test and Train matrices for each word

In [None]:
words = ['catapulta', 'hola', 'iu', 'mar', 'taula', 'victor']
dts = ['Train', 'Test']
word2num = {'catapulta': 0, 
            'hola': 1, 
            'iu': 2, 
            'mar': 3, 
            'taula': 4, 
            'victor': 5}

leng = max(word_max) 

for dt in dts:
    for word in words:
        print("-----",word)

        directory = spec_path + "resized/" + word + "/" + dt + "/"
        img_list = os.listdir(directory)
        data_mat = np.zeros( (128, leng, len(img_list)) )
        i = 0
        for img_name in img_list:
            image = img.imread(directory + img_name)
            try:   # As the image might be in the other dataset (Train / Test)
                data_mat[:,:,i] = image
            except:
                i += 1
                continue
            i += 1
        labels_mat = np.array( [word2num[word]] * len(img_list) )
        labels_mat = np.reshape( labels_mat, (len(img_list), 1) )

        word_dict = {'X': data_mat, 'y': labels_mat}
        scipy.io.savemat(data_path + 'SG_mat/' + word + "/" + dt + "/" + "%s_%s.mat"%(word,dt), word_dict)

----- catapulta
----- hola
----- iu
----- mar
----- taula
----- victor
----- catapulta
----- hola
----- iu
----- mar
----- taula
----- victor
