In [1]:
import pandas as pd
import librosa
import os
from datetime import datetime
import numpy as np
from recorder import Recorder

In [2]:
features = []
r = Recorder()
max_pad_len = 174

In [6]:
def extract_feature_mel(file):
    try:
        audio, sr = librosa.load(file, res_type = 'kaiser_fast')
        #audio = librosa.effects.harmonic(audio)
        mel = librosa.feature.melspectrogram(y=audio, sr=sr)
        pad_width = max_pad_len - mel.shape[1]
        mel = np.pad(mel, pad_width = ((0,0), (0,pad_width)), mode = 'constant')
        #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(audio),sr=sample_rate).T,axis=0)

    except Exception as e :
        #print('Error happened while parsing file', file)
        print(e)
        return None
        
    return mel

In [7]:
result = extract_feature_mel('../../../downloaded_audio_clips/street_music.wav')

In [8]:
print(result.shape)

(128, 174)


In [9]:
print(result)

[[7.5772987e-03 3.2140876e-03 6.0848147e-04 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [5.7917740e-02 8.8473521e-02 1.6783181e-01 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [2.6846144e-01 1.0759641e+00 2.7837169e+00 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 ...
 [2.3331563e-06 6.8978993e-06 4.3038081e-06 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [3.0570257e-07 1.0448880e-06 5.6614493e-07 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [2.6529024e-08 7.4417287e-08 3.4379720e-08 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]]


# sathe sathe chroma_stft r taao kore fellam 

In [10]:
def extract_feature_chroma_stft(file):
    try:
        audio, sr = librosa.load(file, res_type = 'kaiser_fast')
        #audio = librosa.effects.harmonic(audio)
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
        pad_width = max_pad_len - chroma.shape[1]
        chroma = np.pad(chroma, pad_width = ((0,0), (0,pad_width)), mode = 'constant')
        #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(audio),sr=sample_rate).T,axis=0)

    except Exception as e :
        #print('Error happened while parsing file', file)
        print(e)
        return None
        
    return chroma

In [13]:
chroma_result = extract_feature_chroma_stft('../../../downloaded_audio_clips/street_music.wav')

In [14]:
print(chroma_result.shape)

(12, 174)


In [15]:
print(chroma_result)

[[0.0465768  0.07269613 0.13310069 ... 0.         0.         0.        ]
 [0.06030234 0.0291456  0.01146783 ... 0.         0.         0.        ]
 [0.23925596 0.07557968 0.04046364 ... 0.         0.         0.        ]
 ...
 [0.01421273 0.015153   0.01129387 ... 0.         0.         0.        ]
 [0.04114164 0.1045249  0.24759142 ... 0.         0.         0.        ]
 [0.08324129 0.3225774  1.         ... 0.         0.         0.        ]]


## ebar abar df er porer portion theke

collab theka namaya nisi

In [16]:
df = pd.read_hdf('features_from_UrbanSound_for_cnn_for_only_mel.h5', 'df')

In [19]:
X = np.array(df.feature.tolist())
print(X.shape)
print(X[0].shape)

(8732, 128, 174)
(128, 174)


nice >>> ekhon build model Training model er kaaj

In [23]:
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split

#convert the data and labels for understandable numerical data
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from datetime import datetime

In [24]:
def prepare_dataset(test_size, validation_size): 	
	#load data
	X = np.array(df.feature.tolist())
	y = np.array(df.class_label.tolist())
	
    
	le = LabelEncoder()
	y = to_categorical(le.fit_transform(y))
	
	#create train/test split
	X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = test_size)
	
	#create train/validation split
	X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size = validation_size)

	#for CNN tensor flow expects a 3d array -->(130,13,1)
	X_train = X_train[...,np.newaxis] #4d array --> (num_samples, 130, 13,1)
	X_validation = X_validation[...,np.newaxis]
	X_test = X_test[...,np.newaxis]
	
	return X_train, X_validation, X_test, y_train, y_validation, y_test

In [25]:
X_train, X_validation, X_test, y_train, y_validation, y_test= prepare_dataset(0.25, 0.2)
print(len(y_test))
print(y_test.shape)
print(X_train.shape)
print(X_test.shape)

2183
(2183, 10)
(5239, 128, 174, 1)
(2183, 128, 174, 1)


In [26]:
def build_model(input_shape):
	
	#create model 
	model = keras.Sequential()
	#1st conv layer
	model.add(keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape= input_shape))
	model.add(keras.layers.MaxPool2D((3,3), strides = (2,2), padding = 'same'))
	model.add(keras.layers.BatchNormalization()) # speed up korbe>> normalise korbe...details bole nai...beginning level e na janleo ok!
	
	
	#param>>filters, kernel_size, 
	#2nd conv layer
	model.add(keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape= input_shape))
	#param>> kernel_size, padding same means zero padding
	model.add(keras.layers.MaxPool2D((3,3), strides = (2,2), padding = 'same'))
	model.add(keras.layers.BatchNormalization())
	
	
	#3rd conv layer
	model.add(keras.layers.Conv2D(32, (2, 2), activation = 'relu', input_shape= input_shape))
	model.add(keras.layers.MaxPool2D((2,2), strides = (2,2), padding = 'same'))
	model.add(keras.layers.BatchNormalization())
	
	
	#flatten the output and feed it into dense layer
	model.add(keras.layers.Flatten())
	model.add(keras.layers.Dense(64, activation = 'relu'))
	model.add(keras.layers.Dropout(0.3))
	
	#output layer
	model.add(keras.layers.Dense(10, activation = 'softmax'))

	return model


# build the CNN net

build, train, fit ogula Collab e kore feltesi

In [27]:
input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3] ) 
model = build_model(input_shape)

#Compile the CNN
optimizer = keras.optimizers.Adam(learning_rate = 0.0001)
model.compile(optimizer=optimizer, loss= "categorical_crossentropy",
metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 172, 32)      320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 86, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 63, 86, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 84, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 31, 42, 32)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 31, 42, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 30, 41, 32)        4