# DATA PREPROCESSING


In [None]:
# DATA PREPROCESSING
# MFCCs

import pandas as pd
import numpy as np
import tables  
import librosa
import librosa.display
# time taken to read data
chunk = pd.read_excel('ESC-50-master/meta/miniesc50.xlsx',usecols=['filename','category'])
classes = chunk['category']
audiofiles = chunk['filename']

mfccFeatures = []
i = 0
for wavFile in audiofiles:
    data, samplingRate = librosa.load(('ESC-50-master/audio/' + wavFile), sr=24050)

    #Grab the mfcc features through the mfcc algorithm
    mfcc = librosa.feature.mfcc(data,sr=samplingRate,n_mfcc=40)
    mfccs_scaled_features = np.mean(mfcc.T,axis=0)


    mfccFeatures.append([mfccs_scaled_features,classes[i]])
    i += 1
    if (i == 200):
        break
pd.set_option('display.max_colwidth', None)
mfccFeatures = pd.DataFrame(mfccFeatures, columns=['feature','class_category'])


# MODEL CREATION AND TESTING

In [None]:
# MODEL CREATION AND TESTING

import pandas as pd
import numpy as np
import tables  
import librosa
import librosa.display
from sklearn import metrics 
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, ShuffleSplit
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from ann_visualizer.visualize import ann_viz;

# from tensorflow import keras 
from sklearn.model_selection import cross_val_score
from sklearn import metrics
import tensorflow as tf
from sklearn.tree import DecisionTreeClassifier
from numpy import load

X = load('X.npy')

Y = load('Y.npy')

print(X.shape)

#Encode target labels with value between 0 and n-1 classes
Label_encoder = LabelEncoder()
yy=to_categorical((Label_encoder.fit_transform(Y)))


#performing train test split on our data set.
X_train,X_test,Y_train,Y_test=train_test_split(X,yy,test_size=0.40,shuffle = True, random_state=2)

num_labels=yy.shape[1]

model=Sequential()
#first layer
model.add(Dense(256,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

#final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

# To compile the model we need to define loss function which is categorical cross-entropy,
# accuracy metrics which is accuracy score, and an optimizer which is Adam.

model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
model.summary()
# Calculate pre-training accuracy 
score = model.evaluate(X_test, Y_test, verbose=0)
accuracy = 400*score[1]

# TESTING BEFORE TRAINING
print(f'the accuracy {accuracy} before training')

# TRAINING
num_epochs = 200
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='./audio_classification.hdf5', 
                               verbose=1, save_best_only=True)

model.fit(X_train, Y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, Y_test), callbacks=[checkpointer] ,verbose=1)

# EVALUATING AFTER TRAINING
score = model.evaluate(X_train, Y_train, verbose=0)
print("Training Accuracy: {0:.2%}".format(score[1]))

score = model.evaluate(X_test, Y_test, verbose=0)
print("Testing Accuracy: {0:.2%}".format(score[1]))



# PREDICTION

# ONE PREDICTION

In [None]:
filename="output.wav"

#preprocess the audio file
audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

#Reshape MFCC feature to 2-D array
mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)

x_predict=model.predict(mfccs_scaled_features) 
predicted_label=np.argmax(x_predict,axis=1)

prediction_class = Label_encoder.inverse_transform(predicted_label) 
print(prediction_class)

# 25 SAMPLES

In [None]:
# import json
# import base64
# import asyncio
# import pyaudio
# import websockets


# FRAMES_PER_BUFFER = 3200
# FORMAT = pyaudio.paInt16
# CHANNELS = 1
# RATE = 16000
# p = pyaudio.PyAudio()

# # starts recording
# stream = p.open(
#     format=FORMAT,
#     channels=CHANNELS,
#     rate=RATE,
#     input=True,
#     frames_per_buffer=FRAMES_PER_BUFFER
# )

# URL = "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000"


# async def send_receive():
#     print(f'Connecting websocket to url ${URL}')
#     async with websockets.connect(
#             URL,
#             extra_headers=(("Authorization", "8209d9a235da421b9cee1305c901b2de"),),
#             ping_interval=5,
#             ping_timeout=20
#     ) as _ws:
#         await asyncio.sleep(0.1)
#         print("Receiving Session Begins ...")
#         session_begins = await _ws.recv()
#         print(session_begins)
#         print("Sending messages ...")

#         async def send():
#             while True:
#                 try:
#                     data = stream.read(FRAMES_PER_BUFFER)
#                     data = base64.b64encode(data).decode("utf-8")
#                     json_data = json.dumps({"audio_data": str(data)})
#                     await _ws.send(json_data)
#                 except websockets.exceptions.ConnectionClosedError as e:
#                     print(e)
#                     assert e.code == 4008
#                     break
#                 except Exception as e:
#                     assert False, "Not a websocket 4008 error"
#                 await asyncio.sleep(0.01)

#             return True

#         async def receive():
#             while True:
#                 try:
#                     result_str = await _ws.recv()
#                     print(json.loads(result_str)['text'])
#                 except websockets.exceptions.ConnectionClosedError as e:
#                     print(e)
#                     assert e.code == 4008
#                     break
#                 except Exception as e:
#                     assert False, "Not a websocket 4008 error"

#         send_result, receive_result = await asyncio.gather(send(), receive())

# # asyncio.run(send_receive())
# test,receive_result = await send_receive()


# filename="testSounds/sirening.wav"

# #preprocess the audio file
# audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
# mfccs_features = librosa.feature.mfcc(y=test, sr=sample_rate, n_mfcc=40)
# mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

# #Reshape MFCC feature to 2-D array
# mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)

# x_predict=model.predict(mfccs_scaled_features) 
# predicted_label=np.argmax(x_predict,axis=1)

# prediction_class = Label_encoder.inverse_transform(predicted_label) 
# print(prediction_class)

# LIVE LISTENING

In [None]:
# 5 SECOND LISTENING

In [20]:
import pyaudio
import wave

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 24050
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

#################

#################
print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()


filename="output.wav"

#preprocess the audio file
audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

#Reshape MFCC feature to 2-D array
mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)

x_predict=model.predict(mfccs_scaled_features) 
predicted_label=np.argmax(x_predict,axis=1)

prediction_class = Label_encoder.inverse_transform(predicted_label) 
print(prediction_class)

OSError: [Errno -9998] Invalid number of channels

In [None]:
# LIVE LISTENING

In [None]:
import pyaudio
import numpy as np
import librosa
import warnings
import threading

warnings.filterwarnings("ignore")
#This will ignore all DeprecationWarning warnings in your code.
CHUNKSIZE = 1024
SR = 24050

stream_active = True
stop_event = threading.Event()

def audio_callback(in_data, frame_count, time_info, status):
# if stream_active:
    audio_data = np.frombuffer(in_data, dtype=np.int16) / 32767.0 # scale audio data to [-1, 1]
    audio_data = librosa.resample(audio_data, SR, 16000) # resample audio data to 16 kHz
    mfccs_features = librosa.feature.mfcc(audio_data, sr=SR, n_mfcc=40) # compute MFCCs
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

    #Reshape MFCC feature to 2-D array
    mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)

    x_predict=model.predict(mfccs_scaled_features) 
    predicted_label=np.argmax(x_predict,axis=1)

    prediction_class = Label_encoder.inverse_transform(predicted_label) 
    print(prediction_class)

    # preprocess audio data and do sound classification here
    return (audio_data * 32767.0).astype(np.int16), pyaudio.paContinue

p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
                channels=1,
                rate=SR,
                input=True,
                frames_per_buffer=CHUNKSIZE,
                stream_callback=audio_callback)

stream.start_stream()

# while stream.is_active:
#     pass
try:
    while not stop_event.is_set():
        # do other things here while audio is being recorded and processed
        pass
except KeyboardInterrupt:
    stop_event.set()

stream.stop_stream()
stream.close()
p.terminate()
