# Load Data

In [1]:
import numpy as np
import glob
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split



In [2]:
birds_labels = {
    "other": 0,
    "comcuc": 1,
    "cowpig1": 2,
    "eucdov": 3,
    "eueowl1": 4,
    "grswoo": 5,
    "tawowl1": 6
}

birds = [bird for bird in list(birds_labels.keys()) if bird != "other"]

In [3]:
def unique_rows(matrix):
    indexes = []
    labels = []
    
    for i, row in enumerate(matrix):
        if np.unique(row).size == 1:
            indexes.append(i)
            labels.append(np.unique(row)[0])
    
    indexes = np.array(indexes)
    labels = np.array(labels)
    
    return indexes, labels

In [4]:
def load_data(bird):
    labels = []
    features = []
    bird_id = birds_labels[bird]
    
    path = f'./data/{bird}/'
    labels_files = glob.glob(path + '*labels.npy')
    counter = None
    
    for i, file in enumerate(labels_files):
        print(f'{bird}: {i + 1}/{len(labels_files)}', end='\r')
        counter = i
        data_id = path + ''.join(file.split(".labels.npy")).split('/')[-1] + '.npy'
        
        annotations = np.load(file)
        feature = np.load(data_id)
        
        ind, label = unique_rows(annotations)
        
        if len(ind) == 0:
            continue
        
        labels.append(label)
        features.append(feature[ind])

    print('\n')
    labels = np.concatenate(labels)
    features = np.concatenate(features)
    
    return labels, features

In [5]:
X = []
y = []

for bird in birds:
    labels, features = load_data(bird)
    X.append(features)
    y.append(labels)
    
X = np.concatenate(X)
y = np.concatenate(y)

comcuc: 200/200

cowpig1: 200/200

eucdov: 200/200

eueowl1: 200/200

grswoo: 200/200

tawowl1: 200/200



### Feature Selection

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)

In [7]:
data_array = []

with open('./data/feature_names.txt', 'r') as file:
    for line in file:
        line = line.strip()  # Remove leading/trailing whitespaces
        if line:
            data_array.append(line)

data_array = np.array(data_array)
print(data_array)

['zcr_mean' 'zcr_std' 'yin_0' 'yin_1' 'yin_2' 'yin_3' 'yin_4' 'yin_5'
 'yin_6' 'yin_7' 'yin_8' 'yin_9' 'yin_10' 'yin_11' 'yin_12' 'yin_13'
 'raw_melspect_mean_0' 'raw_melspect_mean_1' 'raw_melspect_mean_2'
 'raw_melspect_mean_3' 'raw_melspect_mean_4' 'raw_melspect_mean_5'
 'raw_melspect_mean_6' 'raw_melspect_mean_7' 'raw_melspect_mean_8'
 'raw_melspect_mean_9' 'raw_melspect_mean_10' 'raw_melspect_mean_11'
 'raw_melspect_mean_12' 'raw_melspect_mean_13' 'raw_melspect_mean_14'
 'raw_melspect_mean_15' 'raw_melspect_mean_16' 'raw_melspect_mean_17'
 'raw_melspect_mean_18' 'raw_melspect_mean_19' 'raw_melspect_mean_20'
 'raw_melspect_mean_21' 'raw_melspect_mean_22' 'raw_melspect_mean_23'
 'raw_melspect_mean_24' 'raw_melspect_mean_25' 'raw_melspect_mean_26'
 'raw_melspect_mean_27' 'raw_melspect_mean_28' 'raw_melspect_mean_29'
 'raw_melspect_mean_30' 'raw_melspect_mean_31' 'raw_melspect_mean_32'
 'raw_melspect_mean_33' 'raw_melspect_mean_34' 'raw_melspect_mean_35'
 'raw_melspect_mean_36' 'raw_me

In [8]:
importances = clf.feature_importances_
k = 50
indexes = np.argsort(importances)[::-1][:k]

for feature in data_array[indexes]:
    print(feature)

cln_contrast_mean_3
raw_melspect_mean_6
raw_mfcc_d2_std_0
raw_melspect_mean_9
cln_melspect_mean_4
cln_contrast_mean_4
raw_power_std
raw_melspect_mean_13
cln_melspect_mean_6
raw_melspect_mean_5
raw_melspect_mean_4
raw_melspect_mean_7
raw_melspect_mean_45
cln_melspect_mean_8
raw_melspect_std_8
raw_contrast_mean_3
raw_mfcc_mean_15
raw_melspect_std_7
cln_mfcc_mean_0
raw_contrast_mean_6
cln_contrast_mean_2
raw_mfcc_mean_16
cln_melspect_mean_7
raw_mfcc_mean_9
raw_mfcc_mean_3
raw_mfcc_mean_10
raw_mfcc_mean_1
cln_melspect_mean_19
raw_power_mean
cln_contrast_mean_5
raw_melspect_mean_2
cln_melspect_mean_11
cln_melspect_mean_24
raw_melspect_mean_15
raw_contrast_mean_2
raw_melspect_mean_3
raw_mfcc_mean_2
raw_contrast_mean_4
raw_melspect_mean_0
cln_melspect_mean_9
raw_melspect_mean_1
raw_mfcc_d2_std_1
raw_mfcc_d_mean_0
cln_melspect_mean_5
cln_melspect_mean_12
raw_melspect_mean_11
cln_melspect_mean_10
raw_melspect_mean_8
raw_melspect_mean_30
raw_melspect_std_4


In [9]:
X_small = X[:, indexes]

In [10]:
import pandas as pd

In [11]:
%store X_small X y indexes

Stored 'X_small' (ndarray)
Stored 'X' (ndarray)
Stored 'y' (ndarray)
Stored 'indexes' (ndarray)
