In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import librosa
import numpy as np
import concurrent.futures

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
dataset_path = "/home/adit/Downloads/EE798P/Datasets/Melody Estimation/adc2004_full_set"

In [4]:
wav_files = [dataset_path + "/" + f.name for f in list(os.scandir(dataset_path)) if f.name.endswith('.wav')]
data_files = [f.replace(".wav", "REF.txt") for f in wav_files]

In [5]:
x_data = []
y_data = []

In [6]:
def make_data(wav_file, data_file):
  # Extracting the annotations
  data = []
  with open(data_file, 'r') as f:
    raw_data = f.readlines()
    for line in raw_data:
      data.append([float(x) for x in line.strip().split()])
  annotations = []
  for i in range(len(data) - 1):
    annotations.append([data[i][0], data[i+1][0], data[i][1]])
  annotations = np.array(annotations)
  
  # Extracting the MFCCs
  x_data_temp = []
  y_data_temp = []
  for segment in annotations:
    start_time, end_time = segment[0], segment[1]
    y, sr = librosa.load(wav_file, offset=start_time, duration=end_time-start_time)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    x_data_temp.append(mfcc.T)
    y_data_temp.append(0 if int(segment[2]) == 0 else 1)
  
  x_data.extend(x_data_temp)
  y_data.extend(y_data_temp)
  
  print("[INFO] Done " + wav_file.split("/")[-1])

In [7]:
max_threads = 8
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
  futures = [executor.submit(make_data, wav_file, data_file) for wav_file, data_file in zip(wav_files, data_files)]
  concurrent.futures.wait(futures)

[INFO] Done jazz4.wav
[INFO] Done opera_fem4.wav
[INFO] Done daisy4.wav
[INFO] Done midi2.wav
[INFO] Done daisy3.wav
[INFO] Done midi1.wav
[INFO] Done pop3.wav
[INFO] Done opera_male5.wav
[INFO] Done opera_fem2.wav
[INFO] Done jazz3.wav
[INFO] Done jazz2.wav
[INFO] Done daisy2.wav
[INFO] Done pop2.wav
[INFO] Done daisy1.wav
[INFO] Done pop1.wav
[INFO] Done opera_male3.wav
[INFO] Done midi3.wav
[INFO] Done midi4.wav
[INFO] Done jazz1.wav
[INFO] Done pop4.wav


In [8]:
x_data = np.array([_x_.flatten() for _x_ in np.array(x_data)])
y_data = np.array(y_data)

In [9]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=42)

In [10]:
from sklearn.ensemble import RandomForestClassifier

In [11]:
for i in range(1, 30):
  model = RandomForestClassifier(n_estimators=i, n_jobs=-1)
  model.fit(x_train, y_train)
  y_pred = model.predict(x_test)
  accuracy = accuracy_score(y_test, y_pred)
  print(f"[{i}] Accuracy: {accuracy * 100:.2f}%")

[1] Accuracy: 82.88%
[2] Accuracy: 79.29%
[3] Accuracy: 85.82%
[4] Accuracy: 84.60%
[5] Accuracy: 86.58%
[6] Accuracy: 86.24%
[7] Accuracy: 87.12%
[8] Accuracy: 86.73%
[9] Accuracy: 87.50%
[10] Accuracy: 87.06%
[11] Accuracy: 87.41%
[12] Accuracy: 87.32%
[13] Accuracy: 87.84%
[14] Accuracy: 87.80%
[15] Accuracy: 87.75%
[16] Accuracy: 87.84%
[17] Accuracy: 87.81%
[18] Accuracy: 87.98%
[19] Accuracy: 88.07%
[20] Accuracy: 88.10%
[21] Accuracy: 87.87%
[22] Accuracy: 87.92%
[23] Accuracy: 88.14%
[24] Accuracy: 88.16%
[25] Accuracy: 88.15%
[26] Accuracy: 87.81%
[27] Accuracy: 88.16%
[28] Accuracy: 88.02%
[29] Accuracy: 88.17%


In [12]:
from sklearn.neighbors import KNeighborsClassifier

In [13]:
for i in range(1, 30):
  model = KNeighborsClassifier(n_neighbors=i, p=1, weights='uniform', n_jobs=-1)
  model.fit(x_train, y_train)
  y_pred = model.predict(x_test)
  accuracy = accuracy_score(y_test, y_pred)
  print(f"[{i}] Accuracy: {accuracy * 100:.2f}%")

[1] Accuracy: 85.78%
[2] Accuracy: 83.48%
[3] Accuracy: 87.18%
[4] Accuracy: 86.17%
[5] Accuracy: 87.84%
[6] Accuracy: 87.33%
[7] Accuracy: 88.06%
[8] Accuracy: 87.83%
[9] Accuracy: 88.23%
[10] Accuracy: 87.73%
[11] Accuracy: 88.14%
[12] Accuracy: 87.92%
[13] Accuracy: 88.36%
[14] Accuracy: 88.28%
[15] Accuracy: 88.29%
[16] Accuracy: 88.15%
[17] Accuracy: 88.19%
[18] Accuracy: 88.18%
[19] Accuracy: 88.40%
[20] Accuracy: 88.39%
[21] Accuracy: 88.43%
[22] Accuracy: 88.43%
[23] Accuracy: 88.46%
[24] Accuracy: 88.47%
[25] Accuracy: 88.45%
[26] Accuracy: 88.46%
[27] Accuracy: 88.44%
[28] Accuracy: 88.47%
[29] Accuracy: 88.53%
