In [None]:
from pydub import AudioSegment
import os

def convert_mp3_to_wav(mp3_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    if mp3_path.endswith(".mp3"):
        filename = os.path.basename(mp3_path)
        wav_filename = os.path.splitext(filename)[0] + ".wav"
        wav_path = os.path.join(output_folder, wav_filename)

        audio = AudioSegment.from_mp3(mp3_path)
        audio.export(wav_path, format="wav")
        print(f"Converted: {filename} -> {wav_filename}")
    else:
        print("The file provided is not an MP3.")

mp3_file = "/content/drive/MyDrive/ASR/Language_Files/Coll_HDB_MUM_CBD1_4200_09870580804_06-Jun-25-10-48-25.mp3"
output_dir = "/content/drive/MyDrive/ASR/Output_Wav_files"
convert_mp3_to_wav(mp3_file, output_dir)

In [None]:
from pydub import AudioSegment
import os

def convert_mp3_to_wav_folder(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(input_folder):
        if filename.endswith(".mp3"):
            mp3_path = os.path.join(input_folder, filename)
            wav_filename = os.path.splitext(filename)[0] + ".wav"
            wav_path = os.path.join(output_folder, wav_filename)

            audio = AudioSegment.from_mp3(mp3_path)
            audio.export(wav_path, format="wav")
            print(f"Converted: {filename} -> {wav_filename}")

input_dir = "/content/drive/MyDrive/ASR/Language_Files"
output_dir = "/content/drive/MyDrive/ASR/Output_Wav_files"
convert_mp3_to_wav_folder(input_dir, output_dir)

Converted: Coll_HDB_MUM_CBD1_4849_09975303796_04-Jun-25-11-24-08.mp3 -> Coll_HDB_MUM_CBD1_4849_09975303796_04-Jun-25-11-24-08.wav
Converted: Coll_HDB_MUM_CBD1_4343_09594383389_06-Jun-25-15-07-18.mp3 -> Coll_HDB_MUM_CBD1_4343_09594383389_06-Jun-25-15-07-18.wav
Converted: Coll_HDB_MUM_CBD1_4287_09284000457_06-Jun-25-15-13-42.mp3 -> Coll_HDB_MUM_CBD1_4287_09284000457_06-Jun-25-15-13-42.wav
Converted: Coll_HDB_MUM_CBD1_4345_09657652469_06-Jun-25-10-44-24.mp3 -> Coll_HDB_MUM_CBD1_4345_09657652469_06-Jun-25-10-44-24.wav
Converted: Coll_HDB_MUM_CBD1_4348_09167031447_06-Jun-25-11-48-37.mp3 -> Coll_HDB_MUM_CBD1_4348_09167031447_06-Jun-25-11-48-37.wav
Converted: Coll_HDB_MUM_CBD1_4329_09903342410_06-Jun-25-10-24-36.mp3 -> Coll_HDB_MUM_CBD1_4329_09903342410_06-Jun-25-10-24-36.wav
Converted: Coll_HDB_MUM_CBD1_4288_09730516093_06-Jun-25-09-48-41.mp3 -> Coll_HDB_MUM_CBD1_4288_09730516093_06-Jun-25-09-48-41.wav
Converted: Coll_HDB_MUM_CBD1_4200_09870580804_06-Jun-25-10-48-25.mp3 -> Coll_HDB_MUM_CBD1_

In [None]:
import os
from pydub import AudioSegment

def convert_stereo_to_mono_folder(input_folder, output_folder):

    if not os.path.exists(input_folder):
        print(f"Error: Input folder '{input_folder}' does not exist.")
        return

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created output folder: '{output_folder}'")

    for filename in os.listdir(input_folder):
        if filename.lower().endswith(".wav"):
            input_filepath = os.path.join(input_folder, filename)
            output_filepath = os.path.join(output_folder, filename)

            try:
                audio = AudioSegment.from_wav(input_filepath)

                if audio.channels == 2:
                    mono_audio = audio.set_channels(1)
                    mono_audio.export(output_filepath, format="wav")
                    print(f"Converted '{filename}' to mono and saved to '{output_filepath}'")
                else:
                    print(f"Skipped '{filename}': Already a mono file or not a stereo file.")

            except Exception as e:
                print(f"Error processing '{filename}': {e}")

if __name__ == "__main__":
    input_folder_path = "/content/drive/MyDrive/ASR/Output_Wav_files"
    output_folder_path = "/content/drive/MyDrive/ASR/MonoAudioFiles"

    convert_stereo_to_mono_folder(input_folder_path, output_folder_path)
    print("\nConversion process completed!")

Converted 'Coll_HDB_MUM_CBD1_4849_09975303796_04-Jun-25-11-24-08.wav' to mono and saved to '/content/drive/MyDrive/ASR/MonoAudioFiles/Coll_HDB_MUM_CBD1_4849_09975303796_04-Jun-25-11-24-08.wav'
Converted 'Coll_HDB_MUM_CBD1_4343_09594383389_06-Jun-25-15-07-18.wav' to mono and saved to '/content/drive/MyDrive/ASR/MonoAudioFiles/Coll_HDB_MUM_CBD1_4343_09594383389_06-Jun-25-15-07-18.wav'
Converted 'Coll_HDB_MUM_CBD1_4287_09284000457_06-Jun-25-15-13-42.wav' to mono and saved to '/content/drive/MyDrive/ASR/MonoAudioFiles/Coll_HDB_MUM_CBD1_4287_09284000457_06-Jun-25-15-13-42.wav'
Converted 'Coll_HDB_MUM_CBD1_4345_09657652469_06-Jun-25-10-44-24.wav' to mono and saved to '/content/drive/MyDrive/ASR/MonoAudioFiles/Coll_HDB_MUM_CBD1_4345_09657652469_06-Jun-25-10-44-24.wav'
Converted 'Coll_HDB_MUM_CBD1_4348_09167031447_06-Jun-25-11-48-37.wav' to mono and saved to '/content/drive/MyDrive/ASR/MonoAudioFiles/Coll_HDB_MUM_CBD1_4348_09167031447_06-Jun-25-11-48-37.wav'
Converted 'Coll_HDB_MUM_CBD1_4329_0

In [None]:
import os

def count_files_in_folder(folder_path):
    if not os.path.exists(folder_path):
        print(f"Error: Folder '{folder_path}' does not exist.")
        return -1

    if not os.path.isdir(folder_path):
        print(f"Error: '{folder_path}' is not a directory.")
        return -1

    file_count = 0
    for item in os.listdir(folder_path):
        item_path = os.path.join(folder_path, item)
        if os.path.isfile(item_path):
            file_count += 1
    return file_count

folder = "/content/drive/MyDrive/ASR/MonoAudioFiles"


count = count_files_in_folder(folder)
if count != -1:
    print(f"Number of files in '{folder}': {count}")

Number of files in '/content/drive/MyDrive/ASR/MonoAudioFiles': 100


In [None]:
import json
import csv

def json_to_csv_custom(json_file_path, csv_file_path):
    # Load JSON data
    with open(json_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Check for key 'results' and 'transcripts'
    results = data.get('results', {})
    transcripts = results.get('transcripts', [])
    speaker_segments = results.get('speaker_labels', {}).get('segments', [])

    # Prepare list for CSV rows
    rows = []

    for transcript_obj in transcripts:
        transcript_text = transcript_obj.get('transcript', '')
        # For each segment, find matching speaker label by comparing start_time
        for segment in speaker_segments:
            start_time = segment.get('start_time')
            end_time = segment.get('end_time')
            speaker_label = segment.get('speaker_label')
            # Here, you might want to match segments to parts of transcript if needed
            # For simplicity, associate entire transcript with each segment
            rows.append({
                'transcript': transcript_text,
                'speaker': speaker_label,
                'start_time': start_time,
                'end_time': end_time
            })

    # Save to CSV
    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['transcript', 'speaker', 'start_time', 'end_time']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(rows)

    print(f"CSV saved to {csv_file_path}")

# Example usage:
json_input_path = "/content/telugu_Coll_HDB_CHEN3_5207_09666565150_27-Jun-25-10-59-54.json"
csv_output_path = "/content/output.csv"

json_to_csv_custom(json_input_path, csv_output_path)

CSV saved to /content/output.csv


In [None]:
import os
from pydub import AudioSegment

def convert_stereo_to_mono_mp3(input_folder, output_folder):
    if not os.path.exists(input_folder):
        print(f"Error: Input folder '{input_folder}' does not exist.")
        return
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created output folder: '{output_folder}'")
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(".mp3"):  # Process mp3 files
            input_filepath = os.path.join(input_folder, filename)
            output_filename = os.path.splitext(filename)[0] + ".mp3"  # Save as mp3
            output_filepath = os.path.join(output_folder, output_filename)
            try:
                audio = AudioSegment.from_mp3(input_filepath)  # Load mp3 file
                print(f"{filename} has {audio.channels} channels")
                if audio.channels == 2:
                    mono_audio = audio.set_channels(1)
                    mono_audio.export(output_filepath, format="mp3")
                    print(f"Converted '{filename}' to mono mp3 and saved to '{output_filepath}'")
                else:
                    print(f"Skipped '{filename}': Not a stereo file.")
            except Exception as e:
                print(f"Error processing '{filename}': {e}")

if __name__ == "__main__":
    input_folder_path = "/content/drive/MyDrive/ASR/Hindi"
    output_folder_path = "/content/drive/MyDrive/ASR/MAHindi"
    convert_stereo_to_mono_mp3(input_folder_path, output_folder_path)
    print("\nConversion process completed!")

BFS_CustomerCare_Hindi.mp3 has 1 channels
Skipped 'BFS_CustomerCare_Hindi.mp3': Not a stereo file.
BFS_LeadGeneration_Hindi.mp3 has 1 channels
Skipped 'BFS_LeadGeneration_Hindi.mp3': Not a stereo file.
Coll_HDB_MUM_CBD1_4323_09029826711_27-Jun-25-15-30-17.mp3 has 2 channels
Converted 'Coll_HDB_MUM_CBD1_4323_09029826711_27-Jun-25-15-30-17.mp3' to mono mp3 and saved to '/content/drive/MyDrive/ASR/MAHindi/Coll_HDB_MUM_CBD1_4323_09029826711_27-Jun-25-15-30-17.mp3'
Coll_HDB_MUM_CBD1_4330_09967186126_27-Jun-25-12-59-00.mp3 has 2 channels
Converted 'Coll_HDB_MUM_CBD1_4330_09967186126_27-Jun-25-12-59-00.mp3' to mono mp3 and saved to '/content/drive/MyDrive/ASR/MAHindi/Coll_HDB_MUM_CBD1_4330_09967186126_27-Jun-25-12-59-00.mp3'
Coll_HDB_MUM_CBD1_4330_07045796281_27-Jun-25-08-59-52.mp3 has 2 channels
Converted 'Coll_HDB_MUM_CBD1_4330_07045796281_27-Jun-25-08-59-52.mp3' to mono mp3 and saved to '/content/drive/MyDrive/ASR/MAHindi/Coll_HDB_MUM_CBD1_4330_07045796281_27-Jun-25-08-59-52.mp3'
Coll_HDB_