In [10]:
%pip install numpy pydub librosa soundfile 

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\aksha\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [11]:
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
import numpy as np
import librosa

In [12]:
def read_dataset_folder(dataset_folder):
    audio_files = {'English': [], 'Hindi': [], 'CodeMixed': []}

    for language in ['English', 'Hindi', 'CodeMixed']:
        language_folder = os.path.join(dataset_folder, language)
        for file_name in os.listdir(language_folder):
            if file_name.endswith('.wav'):
                file_path = os.path.join(language_folder, file_name)
                if language == 'English':
                    audio_files['English'].append(file_path)
                elif language == 'Hindi':
                    audio_files['Hindi'].append(file_path)
                elif language == 'CodeMixed':
                    audio_files['CodeMixed'].append(file_path)
    return audio_files

In [13]:
def denoise_audio(y, sr):
    stft = np.abs(librosa.stft(y))
    noise_thresh = np.mean(stft, axis=1)
    stft_denoised = np.where(stft < noise_thresh[:, None], 0, stft)
    y_denoised = librosa.istft(stft_denoised)
    return y_denoised

In [14]:
def resample_audio(y, sr, target_sr=16000):

    return librosa.resample(y, orig_sr=sr, target_sr=target_sr), target_sr


In [15]:
def segment_audio(input_file, min_silence_len=2000, silence_thresh=-30,keep_silence=200, output_dir='segments'):
    
    
    y, sr = librosa.load(input_file, sr=None)
    
    
    y_denoised = denoise_audio(y, sr)
    
   
    y_resampled, target_sr = resample_audio(y_denoised, sr)
    
    
    audio = AudioSegment(
        y_resampled.tobytes(), 
        frame_rate=target_sr, 
        sample_width=y_resampled.dtype.itemsize, 
        channels=1
    )

    
    
    # Split audio based on silence
    segments = split_on_silence(
        audio,
        min_silence_len=min_silence_len,  # Minimum silence length in ms
        silence_thresh=silence_thresh     # Silence threshold in dBFS (decibels relative to full scale)
    )
    
    # Create output directory for the file if not exists
    file_name = os.path.splitext(os.path.basename(input_file))[0]
    file_output_dir = os.path.join(output_dir, file_name)
    if not os.path.exists(file_output_dir):
        os.makedirs(file_output_dir)
    
    # Export each segment as a separate file
    for i, segment in enumerate(segments):
        segment.export(os.path.join(file_output_dir, f'segment_{i+1}.wav'), format="wav")
    
    print(f"Processed {input_file}: Total Segments = {len(segments)}")
    
    return len(segments), [seg.duration_seconds for seg in segments]

In [16]:
## # Function to segment all audio files in a folder with different parameters for English and CodeMixed
def segment_all_files_in_folder(folder_path, output_dir='segments'):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Get audio files
    audio_files = read_dataset_folder(folder_path)
    
    # Process each category with custom parameters
    for category in ['English', 'CodeMixed']:
        print(f"\nProcessing {category} Audio Files...")
        if category == 'English':
            min_silence_len = 500
            silence_thresh = -10
        elif category == 'CodeMixed':
            min_silence_len = 650
            silence_thresh = -10
            
        for file_path in audio_files[category]:
            ans = segment_audio(file_path, min_silence_len, silence_thresh, output_dir)
            print(ans)
    
    print(f"All files in {folder_path} processed and saved to {output_dir}")

# Define output directory and call the function to process files
output_dir = 'segmented_files_output'
folder_path = "C:/Programming/LanguageIdentification/Dataset"  
segment_all_files_in_folder(folder_path, output_dir=output_dir)



Processing English Audio Files...
Processed C:/Programming/LanguageIdentification/Dataset\English\education_0001.wav: Total Segments = 9
(9, [6.912, 1.023, 21.503, 30.045, 708.607, 301.343, 117.661, 36.927, 62.368])
Processed C:/Programming/LanguageIdentification/Dataset\English\education_0002.wav: Total Segments = 117
(117, [1.177, 3.29, 0.22, 6.61, 4.817, 2.916, 3.365, 1.047, 29.562, 1.342, 17.638, 9.346, 5.11, 3.486, 6.429, 6.561, 16.402, 6.071, 9.373, 2.545, 6.327, 1.088, 4.019, 3.206, 1.738, 8.991, 4.927, 15.823, 2.213, 0.652, 0.221, 1.911, 0.669, 1.854, 9.656, 18.912, 9.476, 1.59, 0.664, 5.982, 12.475, 4.987, 2.311, 9.511, 13.465, 13.029, 9.806, 0.536, 20.893, 5.294, 13.477, 4.512, 2.407, 3.193, 3.079, 1.706, 5.565, 4.902, 12.171, 18.979, 15.032, 9.503, 0.274, 7.686, 24.402, 14.57, 5.852, 2.635, 2.393, 3.645, 1.5, 6.759, 5.532, 0.279, 2.365, 10.406, 3.333, 6.552, 7.116, 6.583, 2.749, 5.462, 0.545, 4.182, 3.896, 2.095, 10.297, 16.128, 0.769, 5.224, 6.365, 2.547, 5.555, 0.861, 33.

In [17]:
!zip -r L1Segmentation-Final-8Oct.zip /kaggle/working/segments

'zip' is not recognized as an internal or external command,
operable program or batch file.
