In [9]:
# import os
# import pandas as pd
# import numpy as np
# from pydub import AudioSegment
# import glob

# # Definir una lista para almacenar los nombres de las carpetas base
# carpeta_base_list = []

# # Función para extraer secuencias de emociones continuas
# def extract_audio_emotions(emotions):
#     audio_emotions = []
#     current_emotion = emotions[0]
#     current_sequence = [current_emotion]

#     for emotion in emotions[1:]:
#         if emotion == current_emotion:
#             current_sequence.append(emotion)
#         else:
#             audio_emotions.append(current_sequence)
#             current_emotion = emotion
#             current_sequence = [current_emotion]

#     audio_emotions.append(current_sequence)  # Agregar la última secuencia
#     return audio_emotions

# # Función para leer el archivo de emociones y crear un DataFrame
# def read_emotions_file(txt_file, carpeta_base):
#     with open(txt_file, 'r') as file:
#         lines = file.readlines()[1:]  # Leer todas las líneas excepto la primera
#         emotions = [int(line.strip()) for line in lines]
    
#     # Agregar las emociones al DataFrame con el nombre de la carpeta base
#     emotions_df = pd.DataFrame({'emotion': emotions, 'carpeta_base': carpeta_base})
#     return emotions_df

# # Función para crear segmentos de audio basados en emociones
# def create_audio_segments(emotions, segment_length=5):
#     audio_segments = []
#     current_segment = []
#     last_emotion = None

#     for i, emotion in enumerate(emotions):
#         if last_emotion is None:
#             current_segment.append(emotion)
#         elif emotion == last_emotion:
#             current_segment.append(emotion)
#         else:
#             while len(current_segment) < segment_length:
#                 current_segment.append(last_emotion)
#             audio_segments.append(current_segment)
#             current_segment = [emotion]
#         last_emotion = emotion

#     if len(current_segment) == segment_length:
#         audio_segments.append(current_segment)

#     return audio_segments

# # Función para crear archivos de audio a partir de los segmentos
# def create_audio_files(audio_file, audio_segments, carpeta_base):
#     df = pd.DataFrame(columns=['audio_file', 'emotion', 'carpeta_base'])  # Agregar 'carpeta_base' al DataFrame
#     audio = AudioSegment.from_file(audio_file)
#     audio_name = os.path.splitext(os.path.basename(audio_file))[0]

#     if not os.path.exists(output_folder):
#         os.makedirs(output_folder)

#     for i, segment in enumerate(audio_segments):
#         segment_start = i * 1000  # Cada segmento representa 1 segundo (1000 milisegundos)
#         segment_end = (i + len(segment)) * 1000
#         output_file = os.path.join(output_folder, f"{audio_name}_{i + 1}.wav")
#         segment_audio = audio[segment_start:segment_end]
#         segment_audio.export(output_file, format="wav")
#         emotion = segment[0]  # Se asume que la primera emoción en el segmento representa todo el segmento
#         df.loc[len(df)] = [output_file, emotion, carpeta_base]
#     return df

# # Función para encontrar archivos de texto de emociones
# def find_txt_file(base_name, txt_folder):
#     txt_files = []
#     for dirpath, _, filenames in os.walk(txt_folder):
#         for filename in filenames:
#             if filename.startswith(base_name) and filename.endswith(".txt"):
#                 txt_files.append(os.path.join(dirpath, filename))
#     return txt_files

# # Definir rutas de carpetas y archivos
# if __name__ == "__main__":
#     txt_folder = r"D:\sentirobots\Datasets\AffWild2\Annotations\EXPR_Classification_Challenge"
#     audio_folder = r"D:\sentirobots\Datasets\AffWild2\audios"
#     output_folder = r"D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\Audios cortados"

#     if not os.path.exists(output_folder):
#         os.makedirs(output_folder)

#     df = pd.DataFrame(columns=['audio_file', 'emotion', 'carpeta_base'])  # Agregar 'carpeta_base' al DataFrame

#     # Iterar sobre los archivos de audio en la carpeta de audio
#     for root, dirs, audio_files in os.walk(audio_folder):
#         for audio_file in audio_files:
#             if audio_file.endswith(".wav"):
#                 base_name = os.path.splitext(audio_file)[0]
#                 txt_files = find_txt_file(base_name, txt_folder)

#                 if not txt_files:
#                     print(f"Emotion file not found for {audio_file}. Skipping...")
#                     continue

#                 for txt_file in txt_files:
#                     carpeta_base = os.path.basename(os.path.dirname(txt_file))
#                     emotions_df = read_emotions_file(txt_file, carpeta_base)  # Pasar la carpeta_base a la función
#                     audio_emotions = extract_audio_emotions(emotions_df['emotion'])
#                     audio_segments = create_audio_segments(audio_emotions)
#                     audio_file_path = os.path.join(root, audio_file)
#                     segment_df = create_audio_files(audio_file_path, audio_segments, carpeta_base)
#                     df = pd.concat([df, segment_df], ignore_index=True)

#     df["nombre_archivo"] = df["audio_file"].apply(lambda x: os.path.basename(x.strip()))

In [10]:
import os
import pandas as pd
import numpy as np
from pydub import AudioSegment
import glob

carpeta_base_list = []

# Función para extraer secuencias de emociones continuas
def extract_audio_emotions(emotions):
    audio_emotions = []
    current_emotion = emotions[0]
    current_sequence = [current_emotion]

    for emotion in emotions[1:]:
        if emotion == current_emotion:
            current_sequence.append(emotion)
        else:
            audio_emotions.append(current_sequence)
            current_emotion = emotion
            current_sequence = [current_emotion]

    audio_emotions.append(current_sequence)  # Agregar la última secuencia
    return audio_emotions

# Función para leer el archivo de emociones y crear un DataFrame
def read_emotions_file(txt_file, carpeta_base):
    with open(txt_file, 'r') as file:
        lines = file.readlines()[1:]  # Leer todas las líneas excepto la primera
        emotions = [int(line.strip()) for line in lines]
    
    # Agregar las emociones al DataFrame con el nombre de la carpeta base
    emotions_df = pd.DataFrame({'emotion': emotions, 'carpeta_base': carpeta_base})
    return emotions_df

def create_audio_segments(emotions, segment_length=5):
    audio_segments = []
    current_segment = []
    last_emotion = None

    for i, emotion in enumerate(emotions):
        if last_emotion is None:
            current_segment.append(emotion)
        elif emotion == last_emotion:
            current_segment.append(emotion)
        else:
            while len(current_segment) < segment_length:
                current_segment.append(last_emotion)
            audio_segments.append(current_segment)
            current_segment = [emotion]
        last_emotion = emotion

    if len(current_segment) == segment_length:
        audio_segments.append(current_segment)

    return audio_segments

# Función para crear archivos de audio a partir de los segmentos
def create_audio_files(audio_file, audio_segments, carpeta_base):
    segment_data = []
    audio_name = os.path.splitext(os.path.basename(audio_file))[0]
    
    for i, segment in enumerate(audio_segments):
        emotion = segment[0]  # Se asume que la primera emoción en el segmento representa todo el segmento
        segment_data.append([audio_file, emotion, carpeta_base])

    segment_df = pd.DataFrame(segment_data, columns=['audio_file', 'emotion', 'carpeta_base'])
    return segment_df

def find_txt_file(base_name, txt_folder):
    txt_files = []
    for dirpath, _, filenames in os.walk(txt_folder):
        for filename in filenames:
            if filename.startswith(base_name) and filename.endswith(".txt"):
                txt_files.append(os.path.join(dirpath, filename))
    return txt_files


if __name__ == "__main__":
    txt_folder = r"D:\sentirobots\Datasets\AffWild2\Annotations\EXPR_Classification_Challenge"
    audio_folder = r"D:\sentirobots\Datasets\AffWild2\audios"
    output_folder = r"D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\Audios cortados"

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    df = pd.DataFrame(columns=['audio_file', 'emotion', 'carpeta_base'])

    for root, dirs, audio_files in os.walk(audio_folder):
        for audio_file in audio_files:
            if audio_file.endswith(".wav"):
                base_name = os.path.splitext(audio_file)[0]
                txt_files = find_txt_file(base_name, txt_folder)

                if not txt_files:
                    print(f"Emotion file not found for {audio_file}. Skipping...")
                    continue

                for txt_file in txt_files:
                    carpeta_base = os.path.basename(os.path.dirname(txt_file))
                    emotions_df = read_emotions_file(txt_file, carpeta_base)
                    audio_emotions = extract_audio_emotions(emotions_df['emotion'])
                    audio_segments = create_audio_segments(audio_emotions)
                    audio_file_path = os.path.join(root, audio_file)
                    segment_df = create_audio_files(audio_file_path, audio_segments, carpeta_base)
                    df = pd.concat([df, segment_df], ignore_index=True)

    df["nombre_archivo"] = df["audio_file"].apply(lambda x: os.path.basename(x.strip()))

Emotion file not found for 119-30-848x480.wav. Skipping...
Emotion file not found for 122-60-1920x1080-5.wav. Skipping...
Emotion file not found for 126-30-1080x1920.wav. Skipping...
Emotion file not found for 130-25-1280x720.wav. Skipping...
Emotion file not found for 134-30-1280x720.wav. Skipping...
Emotion file not found for 136-30-1920x1080.wav. Skipping...
Emotion file not found for 14-30-1920x1080.wav. Skipping...
Emotion file not found for 16-30-1920x1080.wav. Skipping...
Emotion file not found for 166.wav. Skipping...
Emotion file not found for 167.wav. Skipping...
Emotion file not found for 168.wav. Skipping...
Emotion file not found for 169.wav. Skipping...
Emotion file not found for 171.wav. Skipping...
Emotion file not found for 172.wav. Skipping...
Emotion file not found for 175.wav. Skipping...
Emotion file not found for 176.wav. Skipping...
Emotion file not found for 177.wav. Skipping...
Emotion file not found for 178.wav. Skipping...
Emotion file not found for 179.wav. 

Emotion file not found for video51.wav. Skipping...
Emotion file not found for video52.wav. Skipping...
Emotion file not found for video53.wav. Skipping...
Emotion file not found for video54.wav. Skipping...
Emotion file not found for video55.wav. Skipping...
Emotion file not found for video56.wav. Skipping...
Emotion file not found for video57.wav. Skipping...
Emotion file not found for video59.wav. Skipping...
Emotion file not found for video60.wav. Skipping...
Emotion file not found for video62.wav. Skipping...
Emotion file not found for video64.wav. Skipping...
Emotion file not found for video65.wav. Skipping...
Emotion file not found for video69.wav. Skipping...
Emotion file not found for video70.wav. Skipping...
Emotion file not found for video71.wav. Skipping...
Emotion file not found for video74.wav. Skipping...
Emotion file not found for video75.wav. Skipping...
Emotion file not found for video76.wav. Skipping...
Emotion file not found for video77.wav. Skipping...
Emotion file

In [11]:
df

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo
0,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720.wav
1,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720.wav
2,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",Train_Set,1-30-1280x720.wav
3,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720.wav
4,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720.wav
...,...,...,...,...
16305,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,video93.wav
16306,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1]",Train_Set,video93.wav
16307,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",Train_Set,video93.wav
16308,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",Train_Set,video93.wav


In [18]:
mapeo_estados = {'Train_Set': 'train', 'Validation_Set': 'val'}

df['carpeta_base'] = df['carpeta_base'].replace(mapeo_estados)

df

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo
0,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",train,1-30-1280x720.wav
1,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",train,1-30-1280x720.wav
2,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",train,1-30-1280x720.wav
3,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",train,1-30-1280x720.wav
4,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",train,1-30-1280x720.wav
...,...,...,...,...
16305,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",train,video93.wav
16306,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1]",train,video93.wav
16307,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",train,video93.wav
16308,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",train,video93.wav


In [19]:
df_train_emociones = df[df['carpeta_base'] == 'train'].copy()
df_train_emociones

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo
0,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",train,1-30-1280x720.wav
1,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",train,1-30-1280x720.wav
2,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",train,1-30-1280x720.wav
3,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",train,1-30-1280x720.wav
4,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",train,1-30-1280x720.wav
...,...,...,...,...
16305,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",train,video93.wav
16306,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1]",train,video93.wav
16307,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",train,video93.wav
16308,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",train,video93.wav


In [20]:
df_val_emociones = df[df['carpeta_base'] == 'val'].copy()
df_val_emociones

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo
1690,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",val,117.wav
1691,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",val,117.wav
1692,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",val,117.wav
1693,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",val,117.wav
1694,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",val,117.wav
...,...,...,...,...
16267,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",val,video73.wav
16268,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...",val,video73.wav
16269,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",val,video73.wav
16270,D:\sentirobots\Datasets\AffWild2\audios\audiob...,"[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...",val,video73.wav


In [21]:
df.to_csv(r'D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\emociones_AffWild2_train_val_Audios.csv', index=False)


df_train_emociones.to_csv(r'D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\emociones_AffWild2_training_Audios.csv', index=False)
df_val_emociones.to_csv(r'D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\emociones_AffWild2_validation_Audios.csv', index=False)

In [212]:
import os
import pandas as pd
import shutil

ruta_carpeta_original = r"D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\Audios cortados"
nueva_carpeta = r"D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\Seleccion"

df_filtrado = df[df["emotion"].apply(lambda x: -1 not in x)].copy()

if not os.path.exists(nueva_carpeta):
    os.makedirs(nueva_carpeta)

for index, row in df_filtrado.iterrows():
    ruta_original = os.path.join(ruta_carpeta_original, row["audio_file"].strip())
    ruta_nueva = os.path.join(nueva_carpeta, os.path.basename(row["audio_file"].strip()))
    if not os.path.exists(ruta_nueva):
        shutil.copy2(ruta_original, ruta_nueva)

df_nuevo = df_filtrado.copy()

df_nuevo

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo
0,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720_1.wav
1,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720_2.wav
3,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720_4.wav
4,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720_5.wav
6,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",Train_Set,1-30-1280x720_7.wav
...,...,...,...,...
16301,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,video93_1.wav
16303,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",Train_Set,video93_3.wav
16305,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,video93_5.wav
16307,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",Train_Set,video93_7.wav


In [213]:
df_nuevo["todos_iguales"] = df_nuevo["emotion"].apply(lambda x: len(set(x)) == 1)

df_nuevo

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo,todos_iguales
0,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720_1.wav,True
1,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720_2.wav,True
3,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720_4.wav,True
4,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720_5.wav,True
6,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",Train_Set,1-30-1280x720_7.wav,True
...,...,...,...,...,...
16301,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,video93_1.wav,True
16303,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",Train_Set,video93_3.wav,True
16305,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,video93_5.wav,True
16307,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",Train_Set,video93_7.wav,True


In [214]:
def obtener_valor_igual(lista):
    if all(i == lista[0] for i in lista):
        return lista[0]
    else:
        return None

df_nuevo["Emocion"] = df_nuevo["emotion"].apply(obtener_valor_igual)

df_nuevo

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo,todos_iguales,Emocion
0,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720_1.wav,True,4
1,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720_2.wav,True,0
3,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",Train_Set,1-30-1280x720_4.wav,True,0
4,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,1-30-1280x720_5.wav,True,4
6,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",Train_Set,1-30-1280x720_7.wav,True,0
...,...,...,...,...,...,...
16301,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,video93_1.wav,True,4
16303,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",Train_Set,video93_3.wav,True,1
16305,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",Train_Set,video93_5.wav,True,4
16307,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",Train_Set,video93_7.wav,True,1


In [216]:
unique_values = df_nuevo["Emocion"].unique()

print("Valores únicos en la columna 'emotion':", unique_values)
print("Cantidad de valores únicos en la columna 'emotion':", len(unique_values))

Valores únicos en la columna 'emotion': [4 0 6 3 5 7 2 1]
Cantidad de valores únicos en la columna 'emotion': 8


In [217]:
unique_valuess = df_nuevo["nombre_archivo"].unique()

print("Valores únicos en la columna 'emotion':", unique_valuess)
print("Cantidad de valores únicos en la columna 'emotion':", len(unique_valuess))

Valores únicos en la columna 'emotion': ['1-30-1280x720_1.wav' '1-30-1280x720_2.wav' '1-30-1280x720_4.wav' ...
 'video93_5.wav' 'video93_7.wav' 'video94_1.wav']
Cantidad de valores únicos en la columna 'emotion': 7961


In [None]:
df_train = df_nuevo[df_nuevo['carpeta_base'] == 'Train_Set'].copy()
df_train['carpeta_base']="train"
df_train

In [222]:
df_val = df_nuevo[df_nuevo['carpeta_base'] == 'Validation_Set'].copy()
df_val['carpeta_base']="val"
df_val

Unnamed: 0,audio_file,emotion,carpeta_base,nombre_archivo,todos_iguales,Emocion
1691,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",val,117_2.wav,True,7
1693,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",val,117_4.wav,True,7
1695,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",val,117_6.wav,True,7
1697,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",val,117_8.wav,True,7
1699,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, ...",val,117_10.wav,True,7
...,...,...,...,...,...,...
16267,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",val,video73_41.wav,True,4
16268,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...",val,video73_42.wav,True,5
16269,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ...",val,video73_43.wav,True,4
16270,D:\sentirobots\Datasets\AffWild2\Audios_AffWil...,"[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...",val,video73_44.wav,True,6


In [1]:
df_train.to_csv(r'D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\audios_training.csv', index=False)
df_val.to_csv(r'D:\sentirobots\Datasets\AffWild2\Audios_AffWild2\audios_validation.csv', index=False)

NameError: name 'df_train' is not defined