# 鳥鳴特徵頻率辨識：分析頻率組成並做鳥種預測。
Step1_取鳥類鳴叫聲作為資料庫  
Step2_前處理將時域訊號做傅立葉轉換為頻域訊號  
Step3_前處理將目標(target)做結構化處理  
Step4_切分訓練集與測試集並執行模型訓練及測試    
Step5_準確率評估  

In [52]:
import os 
import array
import pandas as pd
import numpy as np
import shutil
from pydub import AudioSegment
import audio2numpy as a2n
from scipy import fft
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
import warnings
warnings.filterwarnings('ignore')

### Step1_取鳥類鳴叫聲作為資料庫
從Kaggle下載114種鳥種鳴叫音檔，但不是每種鳥種都有夠多的音檔可以拿來訓練，故選擇至少有30個音檔的鳥種作為訓練對象。  
1_1 走訪特定目錄下的所有資料夾(資料夾皆以鳥種名稱命名)，列出前5個音檔(.mp3)達30個以上的資料夾名稱及數量。  
1_2 取出該5種鳥種之音檔並放置於新建的data資料夾中  

In [39]:
#走訪特定目錄下的所有資料夾(資料夾皆以鳥種名稱命名)，列出前5個音檔達30個以上的資料夾名稱及數量。
#需確認音檔格式為mp3檔
allList = os.listdir('Voice of Birds')
birdspecies = []

for dirname in allList:
    if len(birdspecies) <= 4:
        count = 0
        subpath = os.path.join('Voice of Birds', dirname)
        sublist = os.listdir(subpath)
        for soundtrack in sublist:
            if soundtrack[soundtrack.index('.')+1:] == 'mp3':          
                count += 1
            if count >= 30:
                birdspecies.append(dirname)
print(birdspecies)


['Andean Guan_sound', 'Andean Tinamou_sound', 'Band-tailed Guan_sound', 'Bartletts Tinamou_sound', 'Black-capped Tinamou_sound']


In [51]:
#取出該5種鳥種之音檔並放置於新建的data資料夾中    
if os.path.exists('data'):
    shutil.rmtree('data')
os.mkdir('data')

for name in birdspecies:
    sourcepath = os.path.join('Voice of Birds', name)
    file = os.listdir(sourcepath)
    for g in file:
        shutil.copyfile(os.path.join(sourcepath, g), os.path.join('data', g))

### Step2_前處理將時域訊號做傅立葉轉換為頻域訊號
欲了解該鳥鳴的頻率成份，必須轉為頻域訊號才能做出區別，且錄音設備靈敏度或目標鳥種的距離遠近程度不一，會造成原始的聲壓大小無法相互比較，需先對聲壓大小做規一化(Normalization)。  
2_1 收集target：由於音檔檔名包含鳥種名稱，故存取檔名做target集合   
2_2 收集data：將時域訊號做FFT，並收集各音檔之Octave Band頻域之規一化數據  
2_3 將data及target合併，並存成csv檔  

In [89]:
#收集target：由於音檔檔名包含鳥種名稱，故存取檔名做target集合
soundlist = os.listdir('data')
target = []
for s in soundlist:
    tg = s[0: s.index('.')-2]
    target.append(tg)

#收集data：收集各音檔之Octave Band頻域之規一化數據
octave_value = np.zeros([1,10])

for s in soundlist:
    T = AudioSegment.from_mp3(os.path.join('data', s)).duration_seconds
    Fs = AudioSegment.from_mp3(os.path.join('data', s)).frame_rate
    x = a2n.audio_from_file(os.path.join('data', s))
    x = np.ravel(x[0].transpose())[0:int(T*Fs)]
    X = fft.rfft(x) 
    freq = fft.rfftfreq(len(x), d = 1 / Fs)
    amp = np.abs(X)
    f0 = 1/T

    #區間1_中心頻率16Hz(頻率範圍11~22Hz)
    a1 = np.where(freq<=22, amp, 0)
    b1 = sum(np.where(11<freq, a1, 0))*f0/(22-11)

    #區間2_中心頻率31.5Hz(頻率範圍22~44Hz)
    a2 = np.where(freq<=44, amp, 0)
    b2 = sum(np.where(22<freq, a2, 0))*f0/(44-22)

    #區間3_中心頻率63Hz(頻率範圍44~88Hz)
    a3 = np.where(freq<=88, amp, 0)
    b3 = sum(np.where(44<freq, a3, 0))*f0/(88-44)

    #區間4_中心頻率125Hz(頻率範圍88~177Hz)
    a4 = np.where(freq<=177, amp, 0)
    b4 = sum(np.where(88<freq, a4, 0))*f0/(177-88)

    #區間5_中心頻率250Hz(頻率範圍177~355Hz)
    a5 = np.where(freq<=355, amp, 0)
    b5 = sum(np.where(177<freq, a5, 0))*f0/(355-177)

    #區間6_中心頻率500Hz(頻率範圍355~710Hz)
    a6 = np.where(freq<=710, amp, 0)
    b6 = sum(np.where(355<freq, a6, 0))*f0/(710-355)

    #區間7_中心頻率1000Hz(頻率範圍710~1420Hz)
    a7 = np.where(freq<=1420, amp, 0)
    b7 = sum(np.where(710<freq, a7, 0))*f0/(1420-710)

    #區間8_中心頻率2000Hz(頻率範圍1420~2840Hz)
    a8 = np.where(freq<=2840, amp, 0)
    b8 = sum(np.where(1420<freq, a8, 0))*f0/(2840-1420)

    #區間9_中心頻率4000Hz(頻率範圍2840~5680Hz)
    a9 = np.where(freq<=5680, amp, 0)
    b9 = sum(np.where(2840<freq, a9, 0))*f0/(5680-2840)

    #區間10_中心頻率8000Hz(頻率範圍5680~11360Hz)
    a10 = np.where(freq<=11360, amp, 0)
    b10 = sum(np.where(5680<freq, a10, 0))*f0/(11360-5680)
    
    bandvalue = np.array([[b1,b2,b3,b4,b5,b6,b7,b8,b9,b10]])
    bmax = max(max(bandvalue))
    bmin = min(min(bandvalue))
    nor_bandvalue = (bandvalue-bmin)/(bmax-bmin)
    octave_value = np.append(octave_value,nor_bandvalue,0)

print(target)
print(len(target))
print(octave_value)
print(len(octave_value))    


['Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Guan', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Andean Tinamou', 'Band-tai

In [99]:
#將data及target合併，並存成csv檔
cols = ['16Hz','31.5Hz','63Hz','125Hz','250Hz','500Hz','1000Hz','2000Hz','4000Hz','8000Hz']
df_octave_value = pd.DataFrame(delete_octave_value, columns=cols)
df_target = pd.DataFrame(np.array(target), columns=['target'])

res = pd.concat([df_octave_value,df_target],axis=1)
res.to_csv('birdsoundrecog.csv', header=True, index=False)

### Step3_前處理將目標(target)做結構化處理
已整理出包含數據(data)及目標(target)之可訓練數據，但目標為鳥種名稱，必須先將鳥名作結構化處理轉換為數值。  
3_1 前處理將目標(target)做結構化處理  

In [53]:
#匯入整理好的頻率規一化數據及目標。
df_data = pd.read_csv('birdsoundrecog.csv')
df_data

Unnamed: 0,16Hz,31.5Hz,63Hz,125Hz,250Hz,500Hz,1000Hz,2000Hz,4000Hz,8000Hz,target
0,0.000000,0.013836,0.139232,0.199144,0.206934,0.202467,1.000000,0.980490,0.212575,0.207851,Andean Guan
1,0.000000,0.013836,0.139232,0.199144,0.206934,0.202467,1.000000,0.980490,0.212575,0.207851,Andean Guan
2,0.000000,0.013836,0.139232,0.199144,0.206934,0.202467,1.000000,0.980490,0.212575,0.207851,Andean Guan
3,0.000000,0.000050,0.001638,0.067103,0.233409,0.360025,0.963711,1.000000,0.048460,0.017908,Andean Guan
4,0.000000,0.000050,0.001638,0.067103,0.233409,0.360025,0.963711,1.000000,0.048460,0.017908,Andean Guan
...,...,...,...,...,...,...,...,...,...,...,...
145,0.009033,0.026994,0.022312,0.043509,0.014654,0.013201,1.000000,0.393745,0.058640,0.000000,Black-capped Tinamou
146,0.036702,0.014212,0.005409,0.000840,0.000000,0.010328,1.000000,0.343887,0.056378,0.008303,Black-capped Tinamou
147,0.000376,0.000105,0.000000,0.000282,0.001807,0.053563,1.000000,0.354200,0.030050,0.005231,Black-capped Tinamou
148,0.000376,0.000105,0.000000,0.000282,0.001807,0.053563,1.000000,0.354200,0.030050,0.005231,Black-capped Tinamou


In [54]:
#前處理將目標(target)做結構化處理。
size_mapping = {'Andean Guan':0,'Andean Tinamou':1,'Band-tailed Guan':2,'Bartletts Tinamou':3,'Black-capped Tinamou':4}
df_data['target'] = df_data['target'].map(size_mapping)
df_data

Unnamed: 0,16Hz,31.5Hz,63Hz,125Hz,250Hz,500Hz,1000Hz,2000Hz,4000Hz,8000Hz,target
0,0.000000,0.013836,0.139232,0.199144,0.206934,0.202467,1.000000,0.980490,0.212575,0.207851,0
1,0.000000,0.013836,0.139232,0.199144,0.206934,0.202467,1.000000,0.980490,0.212575,0.207851,0
2,0.000000,0.013836,0.139232,0.199144,0.206934,0.202467,1.000000,0.980490,0.212575,0.207851,0
3,0.000000,0.000050,0.001638,0.067103,0.233409,0.360025,0.963711,1.000000,0.048460,0.017908,0
4,0.000000,0.000050,0.001638,0.067103,0.233409,0.360025,0.963711,1.000000,0.048460,0.017908,0
...,...,...,...,...,...,...,...,...,...,...,...
145,0.009033,0.026994,0.022312,0.043509,0.014654,0.013201,1.000000,0.393745,0.058640,0.000000,4
146,0.036702,0.014212,0.005409,0.000840,0.000000,0.010328,1.000000,0.343887,0.056378,0.008303,4
147,0.000376,0.000105,0.000000,0.000282,0.001807,0.053563,1.000000,0.354200,0.030050,0.005231,4
148,0.000376,0.000105,0.000000,0.000282,0.001807,0.053563,1.000000,0.354200,0.030050,0.005231,4


### Step4_切分訓練集與測試集並執行模型訓練及測試  
4_1 將數據(含鳥種名稱結構化)切分為訓練級與測試級  
4_2 取訓練級做模型訓練  
4_3 取測試級做模型測試  

In [55]:
#將數據(含鳥種名稱結構化)切分為訓練級與測試級
x = df_data[['16Hz','31.5Hz','63Hz','125Hz','250Hz','500Hz','1000Hz','2000Hz','4000Hz','8000Hz']]
y = df_data[['target']]
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.3, random_state=100)

In [56]:
#取訓練級做模型訓練
## 建立邏輯迴歸模型
model = model = linear_model.LogisticRegression()
## 擬和數據
model.fit(X_train, Y_train)

LogisticRegression()

In [57]:
#取測試級做模型測試
prediction = model.predict(X_test)

print('Real Result: ', Y_test)
print('Model Predict: ', prediction)

Real Result:       target
128       4
11        0
118       3
15        0
123       4
135       4
32        1
1         0
116       3
45        1
40        1
115       3
26        0
28        0
145       4
97        3
62        2
77        2
122       4
112       3
125       4
31        1
146       4
29        0
69        2
149       4
75        2
20        0
73        2
120       4
81        2
99        3
119       3
12        0
16        0
51        1
46        1
89        2
136       4
114       3
41        1
90        3
102       3
109       3
37        1
Model Predict:  [4 0 3 2 4 4 1 0 3 1 2 3 4 4 4 2 3 3 4 3 4 1 4 0 2 4 2 0 2 4 1 3 3 4 1 1 1
 2 4 1 1 3 2 3 1]


### Step5_準確率評估。
5_1 評估模型表現  

In [58]:
#評估模型表現
score_train = model.score(X_train, Y_train)
score_test = model.score(X_test, Y_test)
print('Training Accuracy :' + str(score_train * 100) + '%')
print('Testing Accuracy :' + str(score_test * 100) + '%')

Training Accuracy :70.47619047619048%
Testing Accuracy :73.33333333333333%
