In [1]:
import os
import librosa
import numpy as np
import pickle as pkl
import concurrent.futures
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [2]:
base_path = "/home/adit/Downloads/archive/Data/"
male_audio_folder = base_path + "Audio/Male/PTDB-TUG/"
male_data_folder = base_path + "Annotation/Male/PTDB-TUG/"
female_audio_folder = base_path + "Audio/Female/PTDB-TUG/"
female_data_folder = base_path + "Annotation/Female/PTDB-TUG/"

In [3]:
def get_data(file: str):
  raw_data = []
  
  with open(file, 'r') as f:
    lines = f.readlines()
    for i in range(len(lines)):
      data_element = []
      if lines[i].count("intervals [") > 0:
        data_element.append(float(lines[i+1].split("= ")[1]))
        data_element.append(float(lines[i+2].split("= ")[1]))
        data_element.append(0 if lines[i+3].split("= ")[1].count("0") > 0 else 1)
        raw_data.append(data_element)
        i += 3

  processed_data = []
  for data in raw_data:
    start, end, value = data
    current_start = start
    while current_start < end:
      current_end = min(current_start + 0.02, end)
      if current_end != end:
        processed_data.append([round(current_start, 3), round(current_end, 3), value])
      current_start = current_end

  return processed_data

In [4]:
x_data = []
y_data = []

In [5]:
def make_dataset(audio_file: str, data_folder):
  data_file = data_folder + audio_file.name.split("/")[-1].split(".wav")[0] + ".TextGrid"
  segments = get_data(data_file)
  
  for segment in segments:
    start_time, end_time, has_speech = segment
    y, sr = librosa.load(audio_file.path, offset=start_time, duration=end_time-start_time)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    x_data.append(mfccs.T)
    y_data.append(has_speech)

In [6]:
max_threads = 8
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
  futures = [executor.submit(make_dataset, audio_file, male_data_folder) for audio_file in list(os.scandir(male_audio_folder))]
  concurrent.futures.wait(futures)
  print("Done male folder")
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
  futures = [executor.submit(make_dataset, audio_file, female_data_folder) for audio_file in list(os.scandir(female_audio_folder))]
  concurrent.futures.wait(futures)
  print("Done female folder")



Done male folder
Done female folder


In [7]:
x_data = np.array([_x_.flatten() for _x_ in np.array(x_data)])
y_data = np.array(y_data)

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=42)

In [9]:
for i in range(1, 30):
  model = KNeighborsClassifier(n_neighbors=i, p=1, weights='uniform', n_jobs=-1)
  model.fit(x_train, y_train)
  y_pred = model.predict(x_test)
  accuracy = accuracy_score(y_test, y_pred)
  print(f"[{i}] Accuracy: {accuracy * 100:.2f}%")
  

[1] Accuracy: 93.39%
[2] Accuracy: 93.00%
[3] Accuracy: 94.47%
[4] Accuracy: 94.47%
[5] Accuracy: 94.85%
[6] Accuracy: 94.88%
[7] Accuracy: 95.00%
[8] Accuracy: 95.02%
[9] Accuracy: 95.07%
[10] Accuracy: 95.08%
[11] Accuracy: 94.97%
[12] Accuracy: 95.10%
[13] Accuracy: 95.01%
[14] Accuracy: 95.09%
[15] Accuracy: 95.10%
[16] Accuracy: 95.13%
[17] Accuracy: 95.14%
[18] Accuracy: 95.11%
[19] Accuracy: 95.14%
[20] Accuracy: 95.19%
[21] Accuracy: 95.16%
[22] Accuracy: 95.13%
[23] Accuracy: 95.13%
[24] Accuracy: 95.16%
[25] Accuracy: 95.16%
[26] Accuracy: 95.18%
[27] Accuracy: 95.15%
[28] Accuracy: 95.19%
[29] Accuracy: 95.15%


In [10]:
final_model = KNeighborsClassifier(n_neighbors=20, p=1, weights='uniform', n_jobs=-1)
final_model.fit(x_data, y_data)

with open("model.pkl", "wb") as f:
  pkl.dump(final_model, f)