In [2]:
import pandas as pd
import librosa
import os
import numpy as np

def extract_mfcc(file_path):
    y, sr = librosa.load(file_path)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=128)
    mfcc_mean = np.mean(mfcc, axis=1)
    return mfcc_mean

def load_data(data_folder):
    song_folder = os.path.join(data_folder, "Song")
    speech_folder = os.path.join(data_folder, "Speech")

    labels = []
    data = []

    for dataset_folder in [song_folder, speech_folder]:
        for actor in os.listdir(dataset_folder):
            actor_folder = os.path.join(dataset_folder, actor)
            for file in os.listdir(actor_folder):
                file_path = os.path.join(actor_folder, file)
                try:
                    mfcc_mean = extract_mfcc(file_path)
                    labels.append(actor_folder[-2:])
                    data.append(mfcc_mean)
                except Exception as e:
                    print(f"Error al procesar el archivo {file_path}: {str(e)}")

    return data, labels

data, labels = load_data("sound_dataset")

df = pd.DataFrame(labels, columns=['actor'])
data_np = np.array(data)
df_data = pd.DataFrame(data_np)
df = pd.concat([df, df_data], axis=1)

train = df.sample(frac=0.7, random_state=0)
test = df.drop(train.index)

train.to_csv('train.csv', index=False)
test.to_csv('test.csv', index=False)
