In [5]:
# Import libraries.
import pandas as pd
import numpy as np
import scipy
import scipy.io.wavfile
from scipy import signal
import matplotlib.pyplot as plt
import math
import os
import datetime

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [6]:
# Graphing helper function
def setup_graph(title='', x_label='', y_label='', fig_size=None):
    fig = plt.figure()
    if fig_size != None:
        fig.set_size_inches(fig_size[0], fig_size[1])
    ax = fig.add_subplot(111)
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    
def setLabel(file_name):
    if(file_name.find("ruido")) >= 0: return 0
    else: return 1

In [17]:
# Load Training data.
sound_files = list()
with os.scandir("./Audios/training_set") as entries:
    for entry in entries:
        sound_files.append(entry.name)

labels = list()
for s in sound_files:
    labels.append(setLabel(s))
    
print("Audio files: ", sound_files)
print("Labels: ", labels)

Audio files:  ['ruido_1.wav', 'ruido_10.wav', 'ruido_2.wav', 'ruido_3.wav', 'ruido_4.wav', 'ruido_5.wav', 'ruido_6.wav', 'ruido_7.wav', 'ruido_8.wav', 'ruido_9.wav', 'sonido_1.wav', 'sonido_10.wav', 'sonido_2.wav', 'sonido_3.wav', 'sonido_4.wav', 'sonido_5.wav', 'sonido_6.wav', 'sonido_7.wav', 'sonido_8.wav', 'sonido_9.wav']
Labels:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [48]:
class NearestNeighbors():
    
    # Load data.
    def __init__(self, X, y, neighbors = 5):
        self.sounds = X
        self.labels = y
        self.neighbors = neighbors
        print("Classifier initialize!")
        
    def predict(self, sound):
        
        categories = [0, 0]
        signal_to_predict = self.processSound(sound)
        similar_vector = list()
        
        # Compute audio similarity.
        for s in self.sounds[0:2]:
            training_sound = self.processSound(s)
            fs = 10e3
            f, Cxy = scipy.signal.coherence(signal_to_predict, training_sound, fs, nperseg=1024)
            similar_vector.append(Cxy.mean())
            training_sound = None
            print(s, Cxy.mean())
            
        # Select K-nearest neigbors.
        temp = similar_vector.copy()
        nearest_neighbors = list()
        contador = 0
        
        print(temp)
        
        while(contador < 3):
            element = min(temp)
            nearest_neighbors.append(element)
            temp.remove(element)
            contador += 1
        
        temp = list()
            
        for k in nearest_neighbors:
            
            index = similar_vector.index(k)
            if labels[index] == 0: 
                categories[0] += 1
            else: 
                categories[1] += 1
                
        for c in range(0, len(categories)):
            categories[c] = categories[c] / len(self.sounds)
            
        temp = list()
        nearest_neighbors = list()
        contador = 0
        signal_to_predict = list()
        similar_vector = list()
        
        return categories
    
    # Process Sound.
    def processSound(self, sound):
        
        # Load audio.
        pathFile =  "./Audios/training_set/" + str(sound)
        (sample_rate, input_signal) = scipy.io.wavfile.read(pathFile, mmap = True)

        # Convert to mono.
        try:
            input_signal = input_signal[:,0]
        except: 
            pass
    
        # Calculate time array (max 10 seconds).
        time_array = np.arange(0, len(input_signal)/sample_rate, 1/sample_rate)   
        audio_duration_minutes = time_array[-1] / 60

        # Print audio duration.
        print("Sound ready! Duration in minutes: ", audio_duration_minutes)

        # Convert input signal to 4000 hz.
        input_signal_4000hz = [input_signal[i] for i in range(0, len(input_signal), sample_rate//4000)]
        
        # Return process audio.
        print(len(input_signal_4000hz))
        return input_signal_4000hz
        
    # Get Euclidean Distance between two points.
    def getEuclideanDistance(self, p1, p2):
        distance = 0
        for i in range(0, len(p1)):
            distance += (p1[i] - p2[i])**2
        return math.sqrt(distance)
    
    # Plot results. 
    def plotResults(self, res):
        plt.bar(["Ruido", "No Ruido"], res, width = 0.5, hatch = "/", color = "red", edgecolor = "black")
        plt.ylabel("Probability")
        plt.title("Sound Classification")
        plt.ylim(0, 1)
        plt.show()
        
        if res[0] > res[1]: print("Audio classified as NOISE.")
        elif res[1] > res[0]: print("Audio classified as SOUND.")
        

In [49]:
model = NearestNeighbors(sound_files, labels)

Classifier initialize!


In [50]:
res = model.predict(sound_files[8])

Sound ready! Duration in minutes:  0.16666628873771733
40091
Sound ready! Duration in minutes:  0.16833295540438398
40492
ruido_1.wav 0.013681208
Sound ready! Duration in minutes:  0.16666628873771733
40091
ruido_10.wav 0.01265207
[0.013681208, 0.01265207]


ValueError: min() arg is an empty sequence

In [None]:
model.plotResults(res)

In [None]:
# Graphing helper function
def setup_graph(title='', x_label='', y_label='', fig_size=None):
    fig = plt.figure()
    if fig_size != None:
        fig.set_size_inches(fig_size[0], fig_size[1])
    ax = fig.add_subplot(111)
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

In [None]:
pathFile =  "./Audios/ruido_traffic2.wav"
# Load audio file.
(sample_rate, input_signal) = scipy.io.wavfile.read(pathFile, mmap = True)
input_signal = input_signal[:,0]
time_array = np.arange(0, len(input_signal)/sample_rate, 1/sample_rate)   
audio_duration_minutes = time_array[-1] / 60
print("Sound ready! Duration in minutes: ", audio_duration_minutes)
input_signal_8000hz = [input_signal[i] for i in range(0, len(input_signal), sample_rate//8000)]

In [None]:
setup_graph(title=pathFile, x_label='time (in seconds)', y_label='amplitude', fig_size=(14,7))
_ = plt.plot(time_array, input_signal)

In [None]:
fft_out = np.fft.rfft(input_signal)
fft_mag = [np.sqrt(i.real**2 + i.imag**2)/len(fft_out) for i in fft_out]
num_samples = len(input_signal)
rfreqs = [(i*1.0/num_samples)*sample_rate for i in range(num_samples//2+1)]

setup_graph(title=pathFile, x_label='FFT Bins', y_label='magnitude', fig_size=(14,7))
_ = plt.plot(rfreqs[0:5000], fft_mag[0:5000])

In [None]:
#setup_graph(title='Spectrogram of diatonic scale ' + str(sample_rate) + " sample rate", x_label='time (in seconds)', y_label='frequency', fig_size=(14,8))
#_ = plt.specgram(input_signal, Fs=sample_rate)

input_signal_4000hz = [input_signal[i] for i in range(0, len(input_signal), sample_rate//4000)]
setup_graph(title='Spectrogram (4000Hz sample rate)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,7))
_ = plt.specgram(input_signal_4000hz, Fs=4000)

In [None]:
f, t, Sxx = scipy.signal.spectrogram(input_signal)
plt.pcolormesh(t, f, Sxx)
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.show()