In [1]:
import os

In [2]:
import speech_recognition as sr

In [3]:
def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio_data = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio_data, language="en-US")
        return text
    except sr.UnknownValueError:
        print("Speech Recognition could not understand the audio.")
        return None
    except sr.RequestError as e:
        print(f"Could not request results from Speech Recognition service; {e}")
        return None

In [4]:
def segment_conversation(text):
    # Split the text into segments based on speaker turns
    segments = []
    current_speaker = None
    current_segment = ""
    lines = text.split("\n")
    for line in lines:
        if line.startswith("Person"):
            if current_segment:
                segments.append((current_speaker, current_segment.strip()))
            current_speaker = line
            current_segment = ""
        else:
            current_segment += line + " "
    if current_segment:
        segments.append((current_speaker, current_segment.strip()))
    return segments


In [5]:
def write_conversation(segments, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    for i, (speaker, text) in enumerate(segments):
        filename = f"person{i + 1}.txt"
        with open(os.path.join(output_folder, filename), "w") as f:
            f.write(f"{speaker}:\n")
            f.write(text)

In [7]:
if __name__ == "__main__":
    audio_file = "D:\separateaudiocheck2.wav"
    output_folder = "segregated_conversation"

    # Step 1: Transcribe audio
    text = transcribe_audio(audio_file)

    # Step 2: Segment conversation
    segments = segment_conversation(text)

    # Step 3: Write segmented conversation to files
    write_conversation(segments, output_folder)