In [4]:
import librosa
import numpy as np
import pandas as pd
import pdb
import string
import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from tensorflow.keras import optimizers

In [5]:
def wav2feat(wavfile):
    '''
    Input: audio wav file name
    Output: Magnitude spectrogram
    '''
    x, Fs = librosa.load(wavfile, sr=44100, mono=True) 
    hop = int(0.01 * Fs) # 10ms
    win = int(0.02 * Fs) # 20ms
    X = librosa.stft(x, n_fft=1024, hop_length=hop, win_length=win, window='hann', center=True, pad_mode='reflect')
    return np.abs(X)

In [6]:
def read_csv(filename):
    id_label = {}
    with open(filename,'r') as fid:
        for line in fid: # '176787-5-0-27.wav,engine_idling\n'
            tokens = line.strip().split(',') # ['176787-5-0-27.wav', 'engine_idling']
            id_label[tokens[0]] = tokens[1]
    return id_label

In [7]:
def unique_class(y):
  #unique class
  cl=np.unique(y)
  num_cl=len(cl)
  return cl,num_cl

In [8]:
#one hot encoding
def one_hot_encoding(y_tr,y_te):
  cl,num_cl=unique_class(y_tr)
  y_train=np.zeros((len(y_te),num_cl))
  for i in range(0,len(y_te)):
    for j in range(0,num_cl):
      if y_te[i]==cl[j]:
        y_train[i][j]=1
        break
  return y_train

In [26]:
#reshape input data
def reshape_X(X):
  A=X.shape[0]
  B=X[0].shape[0]
  C=X[0].shape[1]
  X.resize(A,B,C,1)

In [10]:
#classname to class number map
def class_map(uniq_clas):
  d={}
  for i in range(0,len(uniq_clas)):
    d[i] = uniq_clas[i]
  return d

In [27]:
#y_pred num to lable
def num_to_lable(y):
  lable=[]
  for i in y:
    lable.append(map[i])
  return lable

In [12]:
#write csv file
def write_csv(aud_name,lable,filepath): 
  dict = {'name': aud_name, 'class': lable}      
  df = pd.DataFrame(dict)  
  df.to_csv(filepath,index=False) 

In [85]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
 f='/content/drive/MyDrive/labels_train.csv'
 aud_lable=read_csv(f)
 x_train=[]
 y=[]
 df = pd.read_csv(f, sep=',', header=None)
 aud_name = df[0].values
 aud_name = aud_name[1:]
 for i in aud_name:
   w='/content/drive/MyDrive/audio_train_1ch/'+i
   U=wav2feat(w)
   U.resize(513,401)
   x_train.append(U)
   y.append(aud_lable[i])
 x_train=np.array(x_train)
 y=np.array(y)

In [29]:
reshape_X(x_train)
cl,num_cl=unique_class(y)
y_train=one_hot_encoding(y,y)
norm_x=tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=True,featurewise_std_normalization=True)
norm_x.fit(x_train)

In [14]:
#Define Model
input_shape=x_train[0].shape
model = Sequential()
model.add(Convolution2D(32, kernel_size=(3,3), activation='relu', input_shape=input_shape))
model.add(Convolution2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_cl, activation='softmax'))
#Compile
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizers.Adam(), metrics=['accuracy'])

In [None]:
#Train and Test The Model
model.fit(norm_x.flow(x_train, y_train, batch_size=5), steps_per_epoch=len(x_train)/10, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f6752556400>

In [37]:
if __name__=="__main__":
  f='/content/drive/MyDrive/output/data/data/test_task1/task1.csv'
  #aud_lable1=read_csv(f)
  x_test1=[]
  #y_test1=[]
  test1= pd.read_csv(f, sep=',', header=None)
  aud_test1 = test1[0].values
  for i in aud_test1:
    w='//content/drive/MyDrive/output/data/data/test_task1/feats/'+i
    V=np.load(w)
    V=np.array(V)
    V.resize(513,401)
    x_test1.append(V)
    #y_test1.append(aud_lable1[i])
  x_test1=np.array(x_test1)
  #y_test1=np.array(y_test1)
  #y_test1_one_hot=one_hot_encoding(y,y_test1)
  reshape_X(x_test1)
  #scores = model.evaluate(x_test1, y_test1_one_hot, verbose=1)
  #print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
  ynew = model.predict_classes(x_test1)
  map=class_map(cl)
  lable=num_to_lable(ynew)
  f_path='/content/drive/MyDrive/output/task1_labels_test.csv'
  write_csv(aud_test1,lable,f_path)

['a001.npy' 'a002.npy' 'a003.npy' 'a004.npy' 'a005.npy' 'a006.npy'
 'a007.npy' 'a008.npy' 'a009.npy' 'a010.npy' 'a011.npy' 'a012.npy'
 'a013.npy' 'a014.npy' 'a015.npy' 'a016.npy' 'a017.npy' 'a018.npy'
 'a019.npy' 'a020.npy' 'a021.npy' 'a022.npy' 'a023.npy' 'a024.npy'
 'a025.npy' 'a026.npy' 'a027.npy' 'a028.npy' 'a029.npy' 'a030.npy'
 'a031.npy' 'a032.npy' 'a033.npy' 'a034.npy' 'a035.npy' 'a036.npy'
 'a037.npy' 'a038.npy' 'a039.npy' 'a040.npy' 'a041.npy' 'a042.npy'
 'a043.npy' 'a044.npy' 'a045.npy' 'a046.npy' 'a047.npy' 'a048.npy'
 'a049.npy' 'a050.npy']
['drilling', 'children_playing', 'engine_idling', 'dog_bark', 'drilling', 'children_playing', 'drilling', 'drilling', 'children_playing', 'dog_bark', 'dog_bark', 'children_playing', 'children_playing', 'street_music', 'drilling', 'children_playing', 'children_playing', 'street_music', 'gun_shot', 'drilling', 'jackhammer', 'engine_idling', 'gun_shot', 'drilling', 'drilling', 'engine_idling', 'drilling', 'engine_idling', 'street_music', '