In [1]:
import os
import sys

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import IPython.display as ipd
from IPython.display import Audio, display, Markdown
import librosa
import librosa.display

# Norlmalizado de audios a 16 kHz y mono

In [7]:
import os
import librosa
import soundfile as sf


input_folder = "AudioVoiceData/audDataStdrWav/Metod3Wav"  
output_folder = "AudioVoiceData/audDataStdrWav/met3norm16" 
target_sr = 16000  
mono = True  


def normalize_audio(input_path, output_path, target_sr, mono):
    
    y, sr = librosa.load(input_path, sr=target_sr, mono=mono)
    y_normalized = librosa.util.normalize(y)
    sf.write(output_path, y_normalized, target_sr)


for filename in os.listdir(input_folder):
    if filename.endswith(".wav"): 
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        normalize_audio(input_path, output_path, target_sr, mono)
        print(f"Audio normalizado guardado: {output_path}")

print("Completado")

Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_01.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_10.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_2.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_3.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_4.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_5.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_6.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_7.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_8.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\AndresCalzada-03_9.wav
Audio normalizado guardado: AudioVoiceData/audDataStdrWav/met3norm16\ArathDani

# Data augmentation 

In [13]:
from audiomentations import Compose, AddGaussianNoise, PitchShift, TimeStretch


input_folder = "AudioVoiceData/audDataStdrWav/met1norm16"  
output_folder = "AudioVoiceData/audDataStdrWav/met1norm16"  
target_sr = 16000 
num_augmentations = 1 

augmenter = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5), 
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.2, p=0.5), 
])


def augment_audio(input_path, output_path):

    y, sr = librosa.load(input_path, sr=target_sr)
    y_augmented = augmenter(y, sr)
    sf.write(output_path, y_augmented, target_sr)


for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        base_name = filename.split("_")[0]  
        audio_number = int(filename.split("_")[1].split(".")[0])  
        
      
        start_number = audio_number + 1
        
      
        for i in range(num_augmentations):
            
            new_audio_number = start_number + i
            output_filename = f"{base_name}_{new_audio_number:02d}.wav"  
            output_path = os.path.join(output_folder, output_filename)
            
            # Aplicar aumentación
            input_path = os.path.join(input_folder, filename)
            augment_audio(input_path, output_path)
            print(f"Audio aumentado guardado: {output_path}")

print("Completado")

Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_02.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_11.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_03.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_04.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_05.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_06.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_07.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_08.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_09.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\AndresCalzada-01_10.wav
Audio aumentado guardado: AudioVoiceData/audDataStdrWav/met1norm16\ArathDaniela-01_02.wav


# Extraccion de caracteristicas MFCC y etiquetado de los datos

In [16]:
import os
import librosa
import numpy as np
import pandas as pd


input_folder = "AudioVoiceData/audDataStdrWav/met1norm16"  
target_sr = 16000  
n_mfcc = 13  


X = []  
y = []  


def extract_mfcc(input_path):
    
    y, sr = librosa.load(input_path, sr=target_sr)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = np.mean(mfcc, axis=1)
    
    return mfcc_mean


for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):  
      
        input_path = os.path.join(input_folder, filename)
        mfcc = extract_mfcc(input_path)
        X.append(mfcc)
        
      
        label = filename.split("-")[0]  
        y.append(label)


X = np.array(X)
y = np.array(y)


df = pd.DataFrame(X, columns=[f"MFCC_{i+1}" for i in range(n_mfcc)])
df["Label"] = y  
output_csv = "AudioVoiceData/mfcc_features.csv"
df.to_csv(output_csv, index=False)

print(f"MFCC guardados en: {output_csv}")

MFCC guardados en: AudioVoiceData/mfcc_features.csv
