In [1]:
import librosa
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale, robust_scale, minmax_scale, maxabs_scale

In [2]:
SAMPLE_RATE = 44100

In [8]:
# Path
files_form = "*.wav"
back_path = "/Users/junhyuckwoo/capstone/TestFile/ProceedData/background_t_44100_1s/" + files_form
drone1_path = "/Users/junhyuckwoo/capstone/TestFile/ProceedData/syma_t_44100_1s/" + files_form
drone2_path = "/Users/junhyuckwoo/capstone/TestFile/ProceedData/mavic_t_44100_1s/" + files_form
drone3_path = "/Users/junhyuckwoo/capstone/TestFile/ProceedData/phantom2_t_44100_1s/" + files_form
print("Background Path: ", back_path)
print("Syma Path: ", drone1_path)
print("Mavic Path: ", drone2_path)
print("Phantom2 Path: ", drone3_path)

Background Path:  /Users/junhyuckwoo/capstone/TestFile/ProceedData/background_t_44100_1s/*.wav
Syma Path:  /Users/junhyuckwoo/capstone/TestFile/ProceedData/syma_t_44100_1s/*.wav
Mavic Path:  /Users/junhyuckwoo/capstone/TestFile/ProceedData/mavic_t_44100_1s/*.wav
Phantom2 Path:  /Users/junhyuckwoo/capstone/TestFile/ProceedData/phantom2_t_44100_1s/*.wav


In [9]:
back_files = glob.glob(back_path)
drone1_files = glob.glob(drone1_path)
drone2_files = glob.glob(drone2_path)
drone3_files = glob.glob(drone3_path)
files_num = min(len(back_files), len(drone1_files), len(drone2_files), len(drone3_files))
print("# Background: ", len(back_files))
print("# Syma: ", len(drone1_files))
print("# Mavic: ", len(drone2_files))
print("# Phantom2: ", len(drone3_files))
print("# Files: ", files_num)    

# Background:  4300
# Syma:  1245
# Mavic:  2475
# Phantom2:  5245
# Files:  1245


In [88]:
dump = [back_files, drone1_files, drone2_files, drone3_files]
input_data = [[],[],[],[]]

In [97]:
for i in range(4):
    for j in range(files_num):    
        raw, sr = librosa.load(dump[i][j], sr=SAMPLE_RATE)
        norm = maxabs_scale(raw)
        buf = librosa.feature.spectral_contrast(norm).T
        if j==0:
            input_data[i] = buf
        else:        
            input_data[i] = np.concatenate((input_data[i], buf))

In [98]:
print("Bee shape:  ", input_data[0].shape)
print("Drone shape1: ", input_data[1].shape)
print("Drone shape2: ", input_data[2].shape)
print("Drone shape3: ", input_data[3].shape)

Bee shape:   (108315, 7)
Drone shape1:  (108315, 7)
Drone shape2:  (108315, 7)
Drone shape3:  (108315, 7)


In [107]:
y = [[],[],[],[]]
for i in range(4):
    y[i] = np.ones(len(input_data[i]))*i
    print("y" ,i, y[i].shape)

y 0 (108315,)
y 1 (108315,)
y 2 (108315,)
y 3 (108315,)


In [148]:
train0, test0, tr_y0, te_y0 = train_test_split(input_data[0], y[0], test_size=0.2, shuffle=True)
train1, test1, tr_y1, te_y1 = train_test_split(input_data[1], y[1], test_size=0.2, shuffle=True)
train2, test2, tr_y2, te_y2 = train_test_split(input_data[2], y[2], test_size=0.2, shuffle=True)
train3, test3, tr_y3, te_y3 = train_test_split(input_data[3], y[3], test_size=0.2, shuffle=True)

In [149]:
train = train0
test = test0
tr_y = tr_y0
te_y = te_y0

In [150]:
train = np.concatenate((train, train1))
train = np.concatenate((train, train2))
train = np.concatenate((train, train3))

test = np.concatenate((test, test1))
test = np.concatenate((test, test2))
test = np.concatenate((test, test3))

tr_y = np.concatenate((tr_y, tr_y1))
tr_y = np.concatenate((tr_y, tr_y2))
tr_y = np.concatenate((tr_y, tr_y3))

te_y = np.concatenate((te_y, te_y1))
te_y = np.concatenate((te_y, te_y2))
te_y = np.concatenate((te_y, te_y3))

In [151]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

In [152]:
rfc.fit(train, tr_y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [156]:
x = test[87:175]
y = te_y[87:175]

In [157]:
prediction = rfc.predict(x)

In [158]:
from sklearn.metrics import precision_recall_fscore_support
p,r,f,s = precision_recall_fscore_support(y, prediction, average='micro')
print("F-Score:", round(f,3))
from sklearn.metrics import accuracy_score
print("Accuracy: ", accuracy_score(y, prediction))

from sklearn.metrics import classification_report
print(classification_report(y, prediction))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y, prediction))

F-Score: 0.773
Accuracy:  0.7727272727272727
             precision    recall  f1-score   support

        0.0       1.00      0.77      0.87        88
        1.0       0.00      0.00      0.00         0
        2.0       0.00      0.00      0.00         0
        3.0       0.00      0.00      0.00         0

avg / total       1.00      0.77      0.87        88

[[68  1  2 17]
 [ 0  0  0  0]
 [ 0  0  0  0]
 [ 0  0  0  0]]


  'recall', 'true', average, warn_for)


In [159]:
from sklearn.externals import joblib

In [None]:
joblib.dump(rfc, "11012018_randomforest_drones_background_n_sc.pkl")