In [66]:
# Install required libraries
# !pip install librosa numpy pandas scikit-learn tensorflow matplotlib


# Audio split

## Imports

In [67]:
import wave
import os
import csv
import glob


## Function for splitting into parts

In [68]:

def split_wav(path, parts, output_location=None):
    """
    Split a .wav file into equal duration parts.
    
    Parameters:
    - path: str, path to the .wav file
    - parts: int, number of parts to split into
    - output_location: str, directory to save split files (default: same as input file)
    
    Returns:
    - List of paths to the split files
    """
    # Open the audio file
    
    with wave.open(path, 'rb') as audio:
        # Get audio parameters
        params = audio.getparams()
        n_channels = params.nchannels
        sampwidth = params.sampwidth
        framerate = params.framerate
        n_frames = params.nframes
        
        # Read all frames
        frames = audio.readframes(n_frames)
    
    # Calculate frames per part
    frames_per_part = n_frames // parts
    
    # Get base name and directory
    base_name = os.path.splitext(os.path.basename(path))[0]
    
    # Extract speaker ID (first 2 characters of filename)
    speaker_id = base_name[0:2]
    
    # Use output_location if provided, otherwise use input file directory
    if output_location:
        directory = output_location
        # Create output directory if it doesn't exist
        os.makedirs(directory, exist_ok=True)
    else:
        directory = os.path.dirname(path) or '.'
    
    # Split and save parts
    output_paths = []
    for i in range(parts):
        # Calculate start and end frames
        start_frame = i * frames_per_part
        
        # For the last part, include any remaining frames
        if i == parts - 1:
            end_frame = n_frames
        else:
            end_frame = (i + 1) * frames_per_part
        
        # Calculate byte positions (each frame has sampwidth bytes per channel)
        bytes_per_frame = n_channels * sampwidth
        start_byte = start_frame * bytes_per_frame
        end_byte = end_frame * bytes_per_frame
        
        # Extract the part
        part_frames = frames[start_byte:end_byte]
        
        # Create output path with speaker ID prefix
        output_path = os.path.join(directory, f"S{speaker_id}_{base_name}_{i+1}.wav")
        
        # Write the part to a new file
        with wave.open(output_path, 'wb') as part_audio:
            part_audio.setparams(params)
            part_audio.writeframes(part_frames)
        
        duration = (end_frame - start_frame) / framerate
        output_paths.append(output_path)
        print(f"Created: {output_path} (duration: {duration:.2f}s)")
    
    return output_paths


In [69]:
# audio_path = "/Users/adityakumar/Developer/Projects/mini_project/dataset/03a05Tc.wav"
# output_path = "/Users/adityakumar/Developer/Projects/mini_project/output"
# print(split_wav(audio_path, 3, output_path))


## Function to create CSV of output file duration

In [70]:

def create_audio_duration_csv(folder_location, csv_folder_location):
    """
    Create a CSV file with filename and duration for all .wav files in a folder.
    
    Parameters:
    - folder_location: str, path to folder containing .wav files
    - csv_folder_location: str, path to folder where CSV will be saved
    
    Returns:
    - Path to the created CSV file
    """
    # Get all .wav files in the folder
    wav_files = glob.glob(os.path.join(folder_location, "*.wav"))
    
    # Create csv folder if it doesn't exist
    os.makedirs(csv_folder_location, exist_ok=True)
    
    # Create CSV file path
    csv_path = os.path.join(csv_folder_location, "audio_durations.csv")
    
    # Collect file data
    file_data = []
    for wav_file in wav_files:
        # Get duration using wave module
        with wave.open(wav_file, 'rb') as audio:
            frames = audio.getnframes()
            rate = audio.getframerate()
            duration = frames / float(rate)
        
        filename = os.path.basename(wav_file)
        file_data.append({'filename': filename, 'duration(sec)': round(duration, 2)})
    
    # Write to CSV
    with open(csv_path, 'w', newline='') as csvfile:
        fieldnames = ['filename', 'duration(sec)']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        writer.writerows(file_data)
    
    print(f"CSV created at: {csv_path} with {len(file_data)} files")
    return csv_path


## Splitting the data into parts

In [71]:
# Split all .wav files in the dataset folder
dataset_folder = "/Users/adityakumar/Developer/Projects/mini_project/dataset"
output_folder = "/Users/adityakumar/Developer/Projects/mini_project/output"

# Get all .wav files
wav_files = os.listdir(dataset_folder)

print(f"Found {len(wav_files)} .wav files to split")

# Split each file based on duration
for wav_file in wav_files:
    # Get duration
    wav_file_path = os.path.join(dataset_folder, wav_file)
    with wave.open(wav_file_path, 'rb') as audio:
        frames = audio.getnframes()
        rate = audio.getframerate()
        duration = frames / float(rate)
    
    
    if duration >= 6:
        parts = 4
    elif duration >= 4:
        parts = 3
    elif duration >= 2:
        parts = 2
    else:
        parts = 1
    
    # Final safety check
    part_duration = duration / parts
    print(f"\nProcessing: {os.path.basename(wav_file)} (duration: {duration:.2f}s, parts: {parts}, part_duration: {part_duration:.2f}s)")
    
    if parts > 1:
        split_wav(wav_file_path, parts, output_folder)
    else:
        print("  Skipping (duration < 2 seconds)")


Found 535 .wav files to split

Processing: 16a02Lb.wav (duration: 2.08s, parts: 2, part_duration: 1.04s)
Created: /Users/adityakumar/Developer/Projects/mini_project/output/S16_16a02Lb_1.wav (duration: 1.04s)
Created: /Users/adityakumar/Developer/Projects/mini_project/output/S16_16a02Lb_2.wav (duration: 1.04s)

Processing: 14a07Wc.wav (duration: 2.06s, parts: 2, part_duration: 1.03s)
Created: /Users/adityakumar/Developer/Projects/mini_project/output/S14_14a07Wc_1.wav (duration: 1.03s)
Created: /Users/adityakumar/Developer/Projects/mini_project/output/S14_14a07Wc_2.wav (duration: 1.03s)

Processing: 10a07Ad.wav (duration: 1.60s, parts: 1, part_duration: 1.60s)
  Skipping (duration < 2 seconds)

Processing: 13a05Ea.wav (duration: 4.20s, parts: 3, part_duration: 1.40s)
Created: /Users/adityakumar/Developer/Projects/mini_project/output/S13_13a05Ea_1.wav (duration: 1.40s)
Created: /Users/adityakumar/Developer/Projects/mini_project/output/S13_13a05Ea_2.wav (duration: 1.40s)
Created: /Users/ad

## Making CSV table of filename and duration for output

In [72]:
info_path = "/Users/adityakumar/Developer/Projects/mini_project/info"
output_path = "/Users/adityakumar/Developer/Projects/mini_project/output"

print(create_audio_duration_csv(output_path, info_path))


CSV created at: /Users/adityakumar/Developer/Projects/mini_project/info/audio_durations.csv with 872 files
/Users/adityakumar/Developer/Projects/mini_project/info/audio_durations.csv


# Building the CSV label

In [74]:
import pandas as pd

def parse_filename(filename):
    """
    Parse filename to extract speaker_id, text_code, emotion_code, and version.
    Format: SXX_SSPPPEV_X.wav
    - SXX: Speaker prefix (e.g., S03)
    - SSPPPEV: Original filename
    - X: Part number
    Example: S03_03a05Tc_1.wav
    """
    # Remove .wav extension
    base_name = filename.replace('.wav', '')
    
    # Split by underscore
    parts = base_name.split('_')
    
    # First part is speaker ID (e.g., S03)
    speaker_prefix = parts[0] if parts else ''
    speaker_id = speaker_prefix[1:] if speaker_prefix.startswith('S') else speaker_prefix
    
    # Middle part is the original filename (e.g., 03a05Tc)
    if len(parts) >= 2:
        original_name = parts[1]
    else:
        original_name = base_name
    
    # Parse the original filename
    text_code = original_name[2:5] if len(original_name) > 5 else ''
    emotion_code = original_name[5] if len(original_name) > 5 else ''
    version = original_name[6] if len(original_name) > 6 else ''
    
    return speaker_id, text_code, emotion_code, version

def get_emotion_label(emotion_code):
    """
    Map German emotion code to emotion label.
    """
    emotion_map = {
        'W': 'Anger',      # Wut
        'L': 'Boredom',    # Langeweile
        'E': 'Disgust',    # Ekel
        'A': 'Anxiety',    # Angst
        'F': 'Happiness',  # Freude
        'T': 'Sadness',    # Traurigkeit
        'N': 'Neutral'     # Neutral
    }
    return emotion_map.get(emotion_code, 'Unknown')

def get_emotion_number(emotion_label):
    """
    Map emotion label to numeral.
    """
    emotion_number_map = {
        'Happiness': 0,
        'Neutral': 1,
        'Anger': 2,
        'Anxiety': 3,
        'Boredom': 4,
        'Disgust': 5,
        'Sadness': 6
    }
    return emotion_number_map.get(emotion_label, -1)

def create_label_csv(output_folder, info_folder):
    """
    Create a CSV file with labels for all audio files.
    Columns: ID, duration, wav, start, stop, spk_id, label
    """
    # Read the audio_durations.csv file
    durations_csv = os.path.join(info_folder, 'audio_durations.csv')
    df_durations = pd.read_csv(durations_csv)
    
    # List to store all rows
    rows = []
    
    # Process each file
    for idx, row in df_durations.iterrows():
        filename = row['filename']
        duration = row['duration(sec)']
        
        # Parse filename
        speaker_id, text_code, emotion_code, version = parse_filename(filename)
        emotion_label = get_emotion_label(emotion_code)
        emotion_number = get_emotion_number(emotion_label)
        
        # Get part number from filename
        # Format: SXX_filename_part.wav
        parts = filename.replace('.wav', '').split('_')
        if len(parts) >= 3:
            part_num = int(parts[-1])
            base_filename = parts[1]  # Original filename without speaker prefix and part number
        else:
            part_num = 1
            base_filename = parts[1] if len(parts) >= 2 else filename.replace('.wav', '')
        
        original_filename = base_filename + '.wav'
        
        # Calculate start and stop times
        # We need to determine how many parts the original file was split into
        # by checking how many files share the same base name (SXX_filename_)
        same_base_files = df_durations[df_durations['filename'].str.contains(f'_{base_filename}_')]
        total_parts = len(same_base_files)
        
        if total_parts == 0:
            # File wasn't split
            start_time = 0.0
            stop_time = duration
        else:
            # File was split - calculate based on part number
            # Get total duration of all parts
            total_duration = same_base_files['duration(sec)'].sum()
            
            # Calculate cumulative duration up to this part
            parts_before = same_base_files[same_base_files['filename'] < filename]
            start_time = parts_before['duration(sec)'].sum() if len(parts_before) > 0 else 0.0
            stop_time = start_time + duration
        
        # File location
        file_location = os.path.join(output_folder, filename)
        
        # Create row
        rows.append({
            'ID': f'S{speaker_id}--{filename.replace(".wav", "")}',
            'duration': duration,
            'wav': file_location,
            'start': 0,
            'stop': int(duration*16000),
            'spk_id': f'S{speaker_id}',
            'label': emotion_number
        })
    
    # Create DataFrame
    df_labels = pd.DataFrame(rows)
    
    # Save to CSV
    output_csv = os.path.join(info_folder, 'audio_labels.csv')
    df_labels.to_csv(output_csv, index=False)
    
    print(f"Label CSV created at: {output_csv}")
    print(f"Total entries: {len(df_labels)}")
    print(f"\nFirst few entries:")
    print(df_labels.head(10))
    
    return output_csv

# Create the label CSV
output_folder = "/Users/adityakumar/Developer/Projects/mini_project/output"
info_folder = "/Users/adityakumar/Developer/Projects/mini_project/info"
create_label_csv(output_folder, info_folder)


Label CSV created at: /Users/adityakumar/Developer/Projects/mini_project/info/audio_labels.csv
Total entries: 872

First few entries:
                   ID  duration  \
0  S14--S14_14b01Fc_1      1.27   
1  S10--S10_10b02Aa_2      1.23   
2  S12--S12_12b10Ld_2      1.51   
3  S13--S13_13b03Td_1      1.66   
4  S16--S16_16a04Ea_2      1.32   
5  S11--S11_11b02Ab_2      1.24   
6  S14--S14_14b02Fb_1      1.42   
7  S08--S08_08a07Wc_1      1.11   
8  S11--S11_11b03Wb_3      1.74   
9  S15--S15_15b03Nb_2      1.60   

                                                 wav  start   stop spk_id  \
0  /Users/adityakumar/Developer/Projects/mini_pro...      0  20320    S14   
1  /Users/adityakumar/Developer/Projects/mini_pro...      0  19680    S10   
2  /Users/adityakumar/Developer/Projects/mini_pro...      0  24160    S12   
3  /Users/adityakumar/Developer/Projects/mini_pro...      0  26560    S13   
4  /Users/adityakumar/Developer/Projects/mini_pro...      0  21120    S16   
5  /Users/adityaku

'/Users/adityakumar/Developer/Projects/mini_project/info/audio_labels.csv'

## Mapping the emotion to number