In [63]:
import librosa

%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
import os
import pandas as pd

In [64]:
class AudioParser:
    def __init__(self, filename, sample_rate = 22050):
        self.filename = filename
        self.sample_rate = 22050
        
    def display_audio(self):
        audio_path = self.filename
        sample_rate = self.sample_rate
        
        x,sr = librosa.load(audio_path, sr = sample_rate)
        plt.figure(figsize=(28, 20))
        plt.subplot(2,1,1)
        librosa.display.waveplot(x, sr=sr)
        plt.title('wave')
    
        plt.subplot(2,1,2)
        X = librosa.stft(x)
        Xdb = librosa.amplitude_to_db(abs(X))
        Xdb[Xdb < - 15] = -30
        librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
        plt.title('{0} pectrogram'.format(audio_path))
        plt.colorbar()
        
    def get_spec_feature(self,frames=100,threshold=0.8):
        audio_path = self.filename
        sample_rate = self.sample_rate
        
        # sample rate = 22050Hz, n_fft = 2048 
        x,sr = librosa.load(audio_path, sr = sample_rate)
        X = librosa.stft(x)
        Xdb = librosa.amplitude_to_db(abs(X))
        
        # normalization, value<threshold is ignored
        Xmin, Xmax = Xdb.min(), Xdb.max()
        Xdb = (Xdb - Xmin)/(Xmax - Xmin)
        Xdb[Xdb < threshold] = 0
                
        return np.mean(Xdb[:,0:frames],axis=1)

In [65]:
def get_audios(dir):
    filelist = [ dir + '/' + file for file in os.listdir(dir) if file.endswith(".wav")]
    filelist.sort()
    return filelist

In [66]:
category_map = {'next_id': 0}

feature = []
category = []
category_name = []

for path in os.listdir():
    if os.path.isdir(path):
        for file in get_audios(path):
            ap = AudioParser(file)
            feature.append(ap.get_spec_feature())
            category_id = category_map.get(path, 'not_found')
            if category_id == 'not_found':
                category_id = category_map['next_id']
                category_map[path] = category_id
                category_map['next_id'] = category_id + 1
            category.append(category_id)
            category_name.append(path)
                           
            print("{0} handled".format(file))

data = {'category':category,
        'category_name':category_name}

df = pd.DataFrame(data)
df.to_csv('dataset/data_map.csv',index=False)

df = pd.DataFrame(feature)
df.to_csv('dataset/data.csv',index=False,header=False)

YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_001_1_1.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_001_1_2.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_002_1_3.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_002_1_4.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_003_1_5.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_003_1_6.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_004_1_7.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_004_1_8.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_005_1_1.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_005_1_9.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_006_1_2.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_006_1_3.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_007_1_3.wav handled
YEWS300_5-6_50/0001项0_7-30℃_100%_噪音_007_1_4.wav handled
YGWH300_4-6_50/0101_6.7-29.4℃_100%_噪音_001_1_1.wav handled
YGWH300_4-6_50/0101_6.7-29.4℃_100%_噪音_002_1_2.wav handled
YGWH300_4-6_50/0101_6.7-29.4℃_100%_噪音_003_1_3.wav handled
YGWH300_4-6_50/0101_6.7-29.4℃_100%_噪音_004_

In [67]:
## Using GaussianNB to category the audio
from sklearn.naive_bayes import GaussianNB

## read feature and label
df = pd.read_csv('dataset/data_map.csv')
target = df['category'].values
df1 = pd.read_csv('dataset/data.csv',header=None)
data = df1.values

gnb = GaussianNB()
y_pred = gnb.fit(data, target).predict(data)
print("Number of mislabeled points out of a total %d points : %d"  % (data.shape[0],(target != y_pred).sum()))

Number of mislabeled points out of a total 66 points : 1
