## 引入库

In [1]:
import keras
from keras.layers import Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D
from keras.models import Sequential
import librosa
import librosa.display
import numpy as np
import pandas as pd
import random

## 读入数据并写入路径

In [2]:
data = pd.read_csv('E:/Project/Data/UrbanSound8K/metadata/UrbanSound8K.csv')
valid_data = data[['slice_file_name', 'fold' ,'classID', 'class']][ data['end']-data['start'] >= 3 ]
valid_data['path'] = 'fold' + valid_data['fold'].astype('str') + '/' + valid_data['slice_file_name'].astype('str')

In [3]:
valid_data

Unnamed: 0,slice_file_name,fold,classID,class,path
1,100263-2-0-117.wav,5,2,children_playing,fold5/100263-2-0-117.wav
2,100263-2-0-121.wav,5,2,children_playing,fold5/100263-2-0-121.wav
3,100263-2-0-126.wav,5,2,children_playing,fold5/100263-2-0-126.wav
4,100263-2-0-137.wav,5,2,children_playing,fold5/100263-2-0-137.wav
5,100263-2-0-143.wav,5,2,children_playing,fold5/100263-2-0-143.wav
...,...,...,...,...,...
8724,99812-1-0-1.wav,7,1,car_horn,fold7/99812-1-0-1.wav
8725,99812-1-0-2.wav,7,1,car_horn,fold7/99812-1-0-2.wav
8726,99812-1-1-0.wav,7,1,car_horn,fold7/99812-1-1-0.wav
8727,99812-1-2-0.wav,7,1,car_horn,fold7/99812-1-2-0.wav


## 进行音频的特征提取

In [4]:
from tqdm import tnrange, notebook

D=[]

for row in notebook.tqdm(valid_data.itertuples()): 
    print(row.path)
    print(row.classID)
    y1, sr1 = librosa.load("E:/Project/Data/UrbanSound8K/audio/" + row.path, duration=2.97)  
    ps = librosa.feature.melspectrogram(y=y1, sr=sr1)
    if ps.shape != (128, 128): 
            continue
    D.append( (ps, row.classID) )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for row in tqdm_notebook(valid_data.itertuples()):


0it [00:00, ?it/s]

fold5/100263-2-0-117.wav
2
fold5/100263-2-0-121.wav
2
fold5/100263-2-0-126.wav
2
fold5/100263-2-0-137.wav
2
fold5/100263-2-0-143.wav
2
fold5/100263-2-0-161.wav
2
fold5/100263-2-0-3.wav
2
fold5/100263-2-0-36.wav
2
fold2/100652-3-0-0.wav
3
fold2/100652-3-0-1.wav
3
fold2/100652-3-0-2.wav
3
fold2/100652-3-0-3.wav
3
fold10/100795-3-0-0.wav
3
fold10/100795-3-1-0.wav
3
fold10/100795-3-1-1.wav
3
fold10/100795-3-1-2.wav
3
fold5/100852-0-0-0.wav
0
fold5/100852-0-0-1.wav
0
fold5/100852-0-0-10.wav
0
fold5/100852-0-0-11.wav
0
fold5/100852-0-0-12.wav
0
fold5/100852-0-0-13.wav
0
fold5/100852-0-0-14.wav
0
fold5/100852-0-0-15.wav
0
fold5/100852-0-0-16.wav
0
fold5/100852-0-0-17.wav
0
fold5/100852-0-0-18.wav
0
fold5/100852-0-0-19.wav
0
fold5/100852-0-0-2.wav
0
fold5/100852-0-0-20.wav
0
fold5/100852-0-0-21.wav
0
fold5/100852-0-0-22.wav
0
fold5/100852-0-0-23.wav
0
fold5/100852-0-0-24.wav
0
fold5/100852-0-0-25.wav
0
fold5/100852-0-0-26.wav
0
fold5/100852-0-0-27.wav
0
fold5/100852-0-0-28.wav
0
fold5/100852-0

## 进行训练和测试集的划分以及类型的标注

In [5]:
from keras.utils import np_utils

In [6]:
dataset = D
random.shuffle(dataset)

train = dataset[:7000]
test = dataset[7000:]

X_train, y_train = zip(*train)
X_test, y_test = zip(*test)

X_train = np.array([x.reshape( (128, 128, 1) ) for x in X_train]) #图像形状统一化
X_test = np.array([x.reshape( (128, 128, 1) ) for x in X_test])


y_train = np.array(np_utils.to_categorical(y_train, 10))
y_test = np.array(np_utils.to_categorical(y_test, 10))

## 网络构建

In [7]:
model = Sequential()
input_shape=(128, 128, 1)

model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(MaxPooling2D((4, 2), strides=(4, 2)))
model.add(Activation('relu'))

model.add(Conv2D(48, (5, 5), padding="valid"))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dropout(rate=0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))

model.add(Dense(10))
model.add(Activation('softmax'))

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 124, 124, 24)      624       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 31, 62, 24)       0         
 )                                                               
                                                                 
 activation (Activation)     (None, 31, 62, 24)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 27, 58, 48)        28848     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 6, 29, 48)        0         
 2D)                                                             
                                                                 
 activation_1 (Activation)   (None, 6, 29, 48)         0

## 模型训练

In [8]:
model.compile(
    optimizer="Adam",
    loss="categorical_crossentropy",
    metrics=['accuracy'])

model.fit(
    x=X_train, 
    y=y_train,
    epochs=12,
    batch_size=128,
    validation_data= (X_test, y_test))

score = model.evaluate(
    x=X_test,
    y=y_test)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


## 准确率和loss查看

In [9]:
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 1.0714404582977295
Test accuracy: 0.640256941318512
