In [9]:
!pip install pydub librosa noisereduce numpy scipy soundfile




[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: C:\Users\sath0\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [10]:
!pip install --upgrade jupyter ipywidgets




[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: C:\Users\sath0\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


PRE - PROCESSING (NOISE and SILENCE REMOVAL)

In [11]:
import os
from pydub import AudioSegment

# Define input and output directories
input_root = "BESD"
output_root = "19_03_mono_BESD"
os.makedirs(output_root, exist_ok=True)  # Create root folder if not exists

# Loop through languages (ENGLISH, TELUGU)
for language in ["ENGLISH", "TELUGU"]:
    language_path = os.path.join(input_root, language)
    output_language_path = os.path.join(output_root, language)
    os.makedirs(output_language_path, exist_ok=True)  # Create language folder if not exists

    # Loop through emotion categories
    for emotion in ["ANGER", "DISGUST", "FEAR", "HAPPY", "SAD", "NEUTRAL"]:
        emotion_path = os.path.join(language_path, emotion)
        output_emotion_path = os.path.join(output_language_path, emotion)
        os.makedirs(output_emotion_path, exist_ok=True)  # Create emotion folder if not exists

        # Process all .wav files in the emotion folder
        for filename in os.listdir(emotion_path):
            if filename.endswith(".wav"):
                input_path = os.path.join(emotion_path, filename)
                output_path = os.path.join(output_emotion_path, filename)

                try:
                    # Load audio
                    audio = AudioSegment.from_wav(input_path)

                    # Convert to mono
                    audio_mono = audio.set_channels(1)

                    # Save the processed file
                    audio_mono.export(output_path, format="wav")
                    print(f"✅ Converted to mono: {language}/{emotion}/{filename}")

                except Exception as e:
                    print(f"❌ Error processing {language}/{emotion}/{filename}: {e}")

print("🎉 All files converted to mono successfully!")

✅ Converted to mono: ENGLISH/ANGER/1.EF_12 Angry_1.wav
✅ Converted to mono: ENGLISH/ANGER/1.EF_12 Angry_2.wav
✅ Converted to mono: ENGLISH/ANGER/1.EF_12 Angry_3.wav
✅ Converted to mono: ENGLISH/ANGER/1.EF_12 Angry_4.wav
✅ Converted to mono: ENGLISH/ANGER/1.EF_12 Angry_5.wav
✅ Converted to mono: ENGLISH/ANGER/10.EF_6 Angry_1.wav
✅ Converted to mono: ENGLISH/ANGER/10.EF_6 Angry_2.wav
✅ Converted to mono: ENGLISH/ANGER/10.EF_6 Angry_3.wav
✅ Converted to mono: ENGLISH/ANGER/10.EF_6 Angry_4.wav
✅ Converted to mono: ENGLISH/ANGER/10.EF_6 Angry_5.wav
✅ Converted to mono: ENGLISH/ANGER/11.EM_13 Angry_1.wav
✅ Converted to mono: ENGLISH/ANGER/11.EM_13 Angry_2.wav
✅ Converted to mono: ENGLISH/ANGER/11.EM_13 Angry_3.wav
✅ Converted to mono: ENGLISH/ANGER/11.EM_13 Angry_4.wav
✅ Converted to mono: ENGLISH/ANGER/11.EM_13 Angry_5.wav
✅ Converted to mono: ENGLISH/ANGER/12.EF_7 Angry_1.wav
✅ Converted to mono: ENGLISH/ANGER/12.EF_7 Angry_2.wav
✅ Converted to mono: ENGLISH/ANGER/12.EF_7 Angry_3.wav
✅ Con

In [12]:
import os
import librosa
import numpy as np
import soundfile as sf
from scipy.signal import wiener
from pydub import AudioSegment
from pydub.silence import split_on_silence

# Define input and output directories
input_root = "BESD"
output_root = "19_03_preprocessed_BESD"  # Updated output folder name
os.makedirs(output_root, exist_ok=True)  # Create root folder if not exists

def remove_silence(input_file_path, output_file_path, min_silence_len=500, silence_thresh=-40):
    """Removes silence from the audio using pydub."""
    audio = AudioSegment.from_wav(input_file_path)
    
    # Split on silence
    chunks = split_on_silence(audio, 
                              min_silence_len=min_silence_len,  # Minimum silence length to consider
                              silence_thresh=silence_thresh)   # Silence threshold in dBFS
    
    # Concatenate chunks without silence
    if chunks:
        audio_without_silence = chunks[0]
        for chunk in chunks[1:]:
            audio_without_silence += chunk
    else:
        audio_without_silence = audio  # If no silence detected, keep the original audio
    
    # Export the result to a temporary file
    temp_output_path = output_file_path.replace(".wav", "_temp.wav")
    audio_without_silence.export(temp_output_path, format="wav")
    
    return temp_output_path  # Return path of processed file

def reduce_noise(y):
    """Applies Wiener filter to reduce background noise."""
    return wiener(y)

# Loop through languages (ENGLISH, TELUGU)
for language in ["ENGLISH", "TELUGU"]:
    language_path = os.path.join(input_root, language)
    output_language_path = os.path.join(output_root, language)
    os.makedirs(output_language_path, exist_ok=True)  # Create language folder if not exists

    # Loop through emotion categories
    for emotion in ["ANGER", "DISGUST", "FEAR", "HAPPY", "SAD", "NEUTRAL"]:
        emotion_path = os.path.join(language_path, emotion)
        output_emotion_path = os.path.join(output_language_path, emotion)
        os.makedirs(output_emotion_path, exist_ok=True)  # Create emotion folder if not exists

        # Process all .wav files in the emotion folder
        for filename in os.listdir(emotion_path):
            if filename.endswith(".wav"):
                input_path = os.path.join(emotion_path, filename)
                output_path = os.path.join(output_emotion_path, filename)

                try:
                    # Remove silence first using pydub
                    temp_processed_path = remove_silence(input_path, output_path)

                    # Load the silence-removed audio
                    y, sr = librosa.load(temp_processed_path, sr=None)

                    # Reduce noise
                    y_clean = reduce_noise(y)

                    # Save final processed audio
                    sf.write(output_path, y_clean, sr)
                    
                    # Remove temporary file
                    os.remove(temp_processed_path)

                    print(f"✅ Processed & saved: {language}/{emotion}/{filename}")

                except Exception as e:
                    print(f"❌ Error processing {language}/{emotion}/{filename}: {e}")

print("🎉 All files processed successfully!")

✅ Processed & saved: ENGLISH/ANGER/1.EF_12 Angry_1.wav
✅ Processed & saved: ENGLISH/ANGER/1.EF_12 Angry_2.wav
✅ Processed & saved: ENGLISH/ANGER/1.EF_12 Angry_3.wav
✅ Processed & saved: ENGLISH/ANGER/1.EF_12 Angry_4.wav
✅ Processed & saved: ENGLISH/ANGER/1.EF_12 Angry_5.wav
✅ Processed & saved: ENGLISH/ANGER/10.EF_6 Angry_1.wav
✅ Processed & saved: ENGLISH/ANGER/10.EF_6 Angry_2.wav
✅ Processed & saved: ENGLISH/ANGER/10.EF_6 Angry_3.wav
✅ Processed & saved: ENGLISH/ANGER/10.EF_6 Angry_4.wav
✅ Processed & saved: ENGLISH/ANGER/10.EF_6 Angry_5.wav
✅ Processed & saved: ENGLISH/ANGER/11.EM_13 Angry_1.wav
✅ Processed & saved: ENGLISH/ANGER/11.EM_13 Angry_2.wav
✅ Processed & saved: ENGLISH/ANGER/11.EM_13 Angry_3.wav
✅ Processed & saved: ENGLISH/ANGER/11.EM_13 Angry_4.wav
✅ Processed & saved: ENGLISH/ANGER/11.EM_13 Angry_5.wav
✅ Processed & saved: ENGLISH/ANGER/12.EF_7 Angry_1.wav
✅ Processed & saved: ENGLISH/ANGER/12.EF_7 Angry_2.wav
✅ Processed & saved: ENGLISH/ANGER/12.EF_7 Angry_3.wav
✅ Pro

In [6]:
import IPython.display as ipd

# Change filename to one of your actual audio files
original_audio = "BESD/ENGLISH/ANGER/1.EF_12 Angry_1.wav"
processed_audio = "19_03_preprocessed_BESD/ENGLISH/ANGER/1.EF_12 Angry_1.wav"

print("🔊 Original Audio:")
ipd.display(ipd.Audio(original_audio))

print("🔊 Processed Audio:")
ipd.display(ipd.Audio(processed_audio))

🔊 Original Audio:


🔊 Processed Audio:


In [8]:
def display_audio_details(file_path):
    y, sr = librosa.load(file_path, sr=None, mono=False)  # Load audio without converting to mono
    
    if len(y.shape) == 1:
        channels = 1  # Mono audio
    else:
        channels = y.shape[0]  # Stereo or multi-channel audio
    
    print(f"Sample Rate: {sr} Hz")
    print(f"Number of Channels: {channels}")
    print(f"Duration: {librosa.get_duration(y=y, sr=sr):.2f} seconds")

# Example usage
file_path = "BESD/TELUGU/ANGER/1.TF_12_angry_1.wav"
display_audio_details(file_path)

Sample Rate: 48000 Hz
Number of Channels: 2
Duration: 2.65 seconds
