In [37]:
!pip install python-levenshtein

Collecting python-levenshtein
[?25l  Downloading https://files.pythonhosted.org/packages/42/a9/d1785c85ebf9b7dfacd08938dd028209c34a0ea3b1bcdb895208bd40a67d/python-Levenshtein-0.12.0.tar.gz (48kB)
[K     |██████▊                         | 10kB 15.3MB/s eta 0:00:01[K     |█████████████▌                  | 20kB 15.1MB/s eta 0:00:01[K     |████████████████████▏           | 30kB 9.2MB/s eta 0:00:01[K     |███████████████████████████     | 40kB 8.8MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 2.8MB/s 
Building wheels for collected packages: python-levenshtein
  Building wheel for python-levenshtein (setup.py) ... [?25l[?25hdone
  Created wheel for python-levenshtein: filename=python_Levenshtein-0.12.0-cp36-cp36m-linux_x86_64.whl size=144794 sha256=8475090836418b86f654804cbed497daa72b1cb4b596236e9be5213797a625c7
  Stored in directory: /root/.cache/pip/wheels/de/c2/93/660fd5f7559049268ad2dc6d81c4e39e9e36518766eaf7e342
Successfully built python-levenshtein
Installi

In [38]:
import librosa
import numpy as np
import pandas as pd
import pdb
import string
from Levenshtein import distance
import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from tensorflow.keras import optimizers

In [45]:
def wav2feat(wavfile):
    '''
    Input: audio wav file name
    Output: Magnitude spectrogram
    '''
    x, Fs = librosa.load(wavfile, sr=44100, mono=True) 
    hop = int(0.01 * Fs) # 10ms
    win = int(0.02 * Fs) # 20ms
    X = librosa.stft(x, n_fft=1024, hop_length=hop, win_length=win, window='hann', center=True, pad_mode='reflect')
    return np.abs(X)

In [10]:
def read_csv(filename):
    id_label = {}
    with open(filename,'r') as fid:
        for line in fid: # '176787-5-0-27.wav,engine_idling\n'
            tokens = line.strip().split(',') # ['176787-5-0-27.wav', 'engine_idling']
            id_label[tokens[0]] = tokens[1]
    return id_label

In [46]:
def editDistance(gt, est):
    '''both are lists of labels
    E.g. gt is "dog_bark-street_music-engine_idling"
    E.g. est is "street_music-engine_idling"
    '''
    gttokens = gt.split('-')
    esttokens = est.split('-')
    # Map token to char
    tokenset = list(set(gttokens+esttokens)) # ['dog_bark', 'siren', 'street_music', 'engine_idling']
    token_char = {}
    for i in range(len(tokenset)):
        token_char[tokenset[i]] = string.ascii_uppercase[i]  # {'dog_bark': 'A', 'siren': 'B', 'street_music': 'C', 'engine_idling': 'D'}
    # convert gt and est to strings
    gtstr = [token_char[t] for t in gttokens]
    gtstr = ''.join(gtstr)  # 'BCA'
    eststr = [token_char[t] for t in esttokens]
    eststr = ''.join(eststr)  # 
    # Compare
    editdist = distance(gtstr, eststr) # 1
    score = 1 - editdist/len(gtstr)
    return editdist, score

In [11]:
def unique_class(y):
  #unique class
  cl=np.unique(y)
  num_cl=len(cl)
  return cl,num_cl

In [12]:
#one hot encoding
def one_hot_encoding(y_tr,y_te):
  cl,num_cl=unique_class(y_tr)
  y_train=np.zeros((len(y_te),num_cl))
  for i in range(0,len(y_te)):
    for j in range(0,num_cl):
      if y_te[i]==cl[j]:
        y_train[i][j]=1
        break
  return y_train

In [13]:
#reshape input data
def reshape_X(X):
  A=X.shape[0]
  B=X[0].shape[0]
  C=X[0].shape[1]
  X.resize(A,B,C,1)

In [14]:
#classname to class number map
def class_map(uniq_clas):
  d={}
  for i in range(0,len(uniq_clas)):
    d[i] = uniq_clas[i]
  return d

In [15]:
#y_pred num to lable
def num_to_lable(y):
  lable=[]
  for i in y:
    lable.append(map[i])
  return lable

In [16]:
#write csv file
def write_csv(aud_name,lable,filepath): 
  dict = {'name': aud_name, 'class': lable}      
  df = pd.DataFrame(dict)  
  df.to_csv(filepath,index=False) 

In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [47]:
if __name__=="__main__":
  f='/content/drive/MyDrive/labels_train.csv'
  aud_lable=read_csv(f)
  x_train=[]
  y=[]
  df = pd.read_csv(f, sep=',', header=None)
  aud_name = df[0].values
  aud_name = aud_name[1:]
  for i in aud_name:
    w='/content/drive/MyDrive/audio_train_1ch/'+i
    U=wav2feat(w)
    U.resize(513,401)
    x_train.append(U)
    y.append(aud_lable[i])
  x_train=np.array(x_train)
  y=np.array(y)

In [48]:
reshape_X(x_train)
cl,num_cl=unique_class(y)
y_train=one_hot_encoding(y,y)
norm_x=tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=True,featurewise_std_normalization=True)
norm_x.fit(x_train)

In [None]:
#Define Model
input_shape=x_train[0].shape
model = Sequential()
model.add(Convolution2D(32, kernel_size=(3,3), activation='relu', input_shape=input_shape))
model.add(Convolution2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_cl, activation='softmax'))
#Compile
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizers.Adam(), metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 511, 399, 32)      320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 509, 397, 64)      18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 254, 198, 64)      0         
_________________________________________________________________
dropout (Dropout)            (None, 254, 198, 64)      0         
_________________________________________________________________
flatten (Flatten)            (None, 3218688)           0         
_________________________________________________________________
dense (Dense)                (None, 128)               411992192 
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0

In [None]:
#Train and Test The Model
model.fit(norm_x.flow(x_train, y_train, batch_size=5), steps_per_epoch=len(x_train)/10, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f6752556400>

In [None]:
if __name__=="__main__":
  label_task2=[]
  f='/content/drive/MyDrive/output/data/data/test_task2/task2.csv'
  #aud_lable1=read_csv(f)
  test1= pd.read_csv(f, sep=',', header=None)
  aud_test1 = test1[0].values
  for i in aud_test1:
    w='/content/drive/MyDrive/output/data/data/test_task2/feats/'+i
    V=np.load(w)
    V=np.array(V)
    T=V.shape[1]/80
    P=int(T)
    if (T-P)<0.1:
      T=P
    else:
      T=P+1
    W=[]
    for i in range(0,T):
      H=i*10
      Z=V[0:,H:H+401]
      if Z.shape[1]<401:
        break
      W.append(Z)
    W=np.array(W)
    W.resize(len(W),513,401,1)
    #y_test2=one_hot_encoding(y,U)
    #scores2 = model.evaluate(Z, y_test2, verbose=1)
    ynew2 = model.predict_classes(W)
    lab1=num_to_lable(ynew2)
    Num=len(lab1)
    k=1
    while k<Num:
      if k==Num:
        break
      elif lab1[k-1]==lab1[k]:
        del lab1[k]
        Num=len(lab1)
        k=k-1
      k=k+1
      Num=len(lab1)
    s='-'
    s = s.join(lab1)
    label_task2.append(s)
  f_path=f_path='/content/drive/MyDrive/output/task2_labels_test.csv'
  write_csv(aud_test1,label_task2,f_path)