In [153]:
import pandas as pd
import numpy as np
import csv
import os

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

import librosa
import gdown
import soundfile as sf
from scipy.signal import butter, filtfilt

from include import helpers

In [154]:
#important parameters for audio processing
base_folder_path = "/Users/gabrielalvesiervolino/Desktop/Coding/machineLearning/voice_recog_final_project/audio_processing/data"

audio_path = [base_folder_path+"/voice_gabe/"]

for i in audio_path:
    print(os.listdir(i))

['Kayıt (23).wav', 'Kayıt (19).wav', 'Kayıt (15).wav', 'Kayıt (14).wav', 'Kayıt (18).wav', 'Kayıt (22).wav', 'Kayıt (6).wav', 'Kayıt (29).wav', 'Kayıt (13).wav', 'Kayıt (25).wav', 'Kayıt (24).wav', 'Kayıt (12).wav', 'Kayıt (28).wav', 'Kayıt (7).wav', 'Kayıt (11).wav', 'Kayıt (4).wav', 'Kayıt (27).wav', 'Kayıt (8).wav', 'Kayıt (9).wav', 'Kayıt (26).wav', 'Kayıt (5).wav', 'Kayıt (10).wav', 'Kayıt.wav', 'Kayıt (21).wav', 'Kayıt (17).wav', 'Kayıt (2).wav', 'Kayıt (3).wav', 'Kayıt (16).wav', 'Kayıt (20).wav']


In [155]:
def std_len(data, target_length):
    print(f"Model input shape: {data.shape}")
    if len(data) < target_length:
        data = np.pad(data, (0, target_length - len(data)))
    else:
        data = data[:target_length]
    print(f"Model output shape: {data.shape}")
    return data

In [156]:
def resample_audio(path, sampling_rate=500):
    y, sr = librosa.load(path, sr=sampling_rate)
    y, _ = librosa.effects.trim(y)
    if sr != sampling_rate:
        librosa.resample(y, sr, sampling_rate)
    return y  

In [157]:
def lowpass(data, cutoff_freq, sample_rate, order=4):
    nyquist = 0.5 * sample_rate
    normal_cutoff = cutoff_freq / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = filtfilt(b, a, data)

    print(f"Filtered audio shape: {filtered_data.shape}")

    return filtered_data
 

In [158]:
def compute_logmel_spectrogram(y, sr, n_mels=40, hop_length=512):
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_length)
    logmel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    return logmel_spectrogram

In [159]:
def resample_data(folders, sr):
    compute_list = []
    for i in folders:
        for j in os.listdir(i):
            print(i+j)
            final_audio = resample_audio(i+j, sr)
            final_audio = lowpass(final_audio, cutoff_freq= (sr/2)-1, sample_rate=sr) 
            final_audio = std_len(final_audio, 80000)

            sf.write(i+j, final_audio, samplerate=sr)
    return compute_list
            

In [160]:
comp_list = resample_data(audio_path, 16000)
with open("test.txt", "w") as f:
    print(comp_list, file=f)

/Users/gabrielalvesiervolino/Desktop/Coding/machineLearning/voice_recog_final_project/audio_processing/data/voice_gabe/Kayıt (23).wav
Filtered audio shape: (24064,)
Model input shape: (24064,)
Model output shape: (80000,)
/Users/gabrielalvesiervolino/Desktop/Coding/machineLearning/voice_recog_final_project/audio_processing/data/voice_gabe/Kayıt (19).wav
Filtered audio shape: (32768,)
Model input shape: (32768,)
Model output shape: (80000,)
/Users/gabrielalvesiervolino/Desktop/Coding/machineLearning/voice_recog_final_project/audio_processing/data/voice_gabe/Kayıt (15).wav
Filtered audio shape: (64000,)
Model input shape: (64000,)
Model output shape: (80000,)
/Users/gabrielalvesiervolino/Desktop/Coding/machineLearning/voice_recog_final_project/audio_processing/data/voice_gabe/Kayıt (14).wav
Filtered audio shape: (24576,)
Model input shape: (24576,)
Model output shape: (80000,)
/Users/gabrielalvesiervolino/Desktop/Coding/machineLearning/voice_recog_final_project/audio_processing/data/voic