In [2]:
import glob
import numpy as np
import os
import random
import librosa

In [4]:
def bool_random(prob):
    r = random.random()
    if r <= prob:
        return True
    else:
        return False


def read_voice_files(dir_path):
    filenames = glob.glob(dir_path + "*.wav")
    name_to_files = dict()
    for i in range(len(filenames)):
        filepath = filenames[i]
        person_name = filepath.split("\\")[-1].split(".")[0].split("-")[:2]
        person_name = person_name[0] + "-" + person_name[1]
        if person_name in name_to_files.keys():
            name_to_files[person_name].append(filepath)
        else:
            name_to_files[person_name] = [filepath]
    return name_to_files


def train_validation_split(validation_size, name_to_files):
    X_t = dict()
    X_v = dict()
    for k, v in name_to_files.items():
        for f in v:
            belongs_to_validation = bool_random(validation_size)
            if belongs_to_validation:
                if k in X_v:
                    X_v[k].append(f)
                else:
                    X_v[k] = [f]
            else:
                if k in X_t:
                    X_t[k].append(f)
                else:
                    X_t[k] = [f]

    return X_t, X_v

In [6]:
# Returns list of lists: [correct number, student's name, matrix of a spectogram]
def process_data(X_train):
    ls = parse_features(X_train)
    for features in ls:
        features[2] = create_spectogram(features[2])
    return ls


# Creates matrix of a spectogram 
def create_spectogram(wav_file):
    signal, sampling_rate = librosa.load(wav_file)
    matrix = librosa.amplitude_to_db(np.abs(librosa.stft(signal)), ref=np.max)

    #matrix = mfcc(signal, sampling_rate, nfft=551)          WE MIGHT USE THIS OR ANOTHER ALGORITHM TO CREATE MATRIX (python_speech_features library)
    #librosa.display.specshow(mfcc_feat, y_axis='linear')    TO PLOT CREATED SPECTOGRAM IMAGE
    #plt.show()
    
    return matrix


# Returns list of lists: [correct number, student's name, path to audio file]
def parse_features(X_train) :
    ls = []
    for wav_name, wav_files in X_train.items():
        name = wav_name[wav_name.index('/') + 1:]
        
        for audio_file in wav_files:
            number = int(''.join(ch for ch in list(audio_file) if ch.isdigit()))
            ls.append([name, number, audio_file])
    
    return ls

In [11]:

VALIDATION_SET_SIZE = 0.2

files_dict = read_voice_files("data_cut/")
X_train, X_validation = train_validation_split(VALIDATION_SET_SIZE, files_dict)
print(X_train)
print(X_validation)

{'data_cut/gega-dara': ['data_cut/gega-dara-4a.wav', 'data_cut/gega-dara-2a.wav', 'data_cut/gega-dara-3d.wav', 'data_cut/gega-dara-1d.wav', 'data_cut/gega-dara-5.wav', 'data_cut/gega-dara-5d.wav', 'data_cut/gega-dara-3b.wav', 'data_cut/gega-dara-4e.wav', 'data_cut/gega-dara-3c.wav', 'data_cut/gega-dara-2e.wav', 'data_cut/gega-dara-3a.wav', 'data_cut/gega-dara-5e.wav', 'data_cut/gega-dara-5b.wav', 'data_cut/gega-dara-4c.wav', 'data_cut/gega-dara-1c.wav', 'data_cut/gega-dara-1b.wav', 'data_cut/gega-dara-2.wav', 'data_cut/gega-dara-2b.wav', 'data_cut/gega-dara-5c.wav', 'data_cut/gega-dara-1a.wav', 'data_cut/gega-dara-1.wav', 'data_cut/gega-dara-4b.wav', 'data_cut/gega-dara-4d.wav', 'data_cut/gega-dara-2d.wav'], 'data_cut/zviad-noza': ['data_cut/zviad-noza-5a.wav', 'data_cut/zviad-noza-5c.wav', 'data_cut/zviad-noza-3a.wav', 'data_cut/zviad-noza-5b.wav', 'data_cut/zviad-noza-1c.wav', 'data_cut/zviad-noza-4d.wav', 'data_cut/zviad-noza-5e.wav', 'data_cut/zviad-noza-4b.wav', 'data_cut/zviad-no

In [12]:
processed_data = process_data(X_train)

In [13]:
processed_data

[['gega-dara',
  4,
  array([[-61.915657, -54.92092 , -57.61151 , ..., -77.84877 , -58.981747,
          -50.71528 ],
         [-51.889275, -47.52514 , -49.61875 , ..., -69.25568 , -58.056763,
          -51.54586 ],
         [-43.10831 , -61.89044 , -45.266396, ..., -55.98145 , -55.848145,
          -46.582905],
         ...,
         [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
          -80.      ],
         [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
          -80.      ],
         [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
          -80.      ]], dtype=float32)],
 ['gega-dara',
  2,
  array([[-72.24462 , -72.88414 , -79.35991 , ..., -80.      , -68.98165 ,
          -62.196705],
         [-69.84151 , -70.06539 , -77.69521 , ..., -75.70672 , -74.324326,
          -60.158215],
         [-55.96306 , -55.871727, -56.158924, ..., -63.36372 , -60.3273  ,
          -58.545444],
         ...,
         [-80.      , -80.      