Testing the API for gpt-4o realtime

In [1]:
import os
from pydub import AudioSegment
from openai import OpenAI
from difflib import SequenceMatcher
import config



client = OpenAI(
  api_key= config.api_key
)

In [4]:
base_folder = r"C:\Users\welin\Documents\Thesis\Data\combined_21"
chunk_length_ms = 11 * 60 * 1000    # 11 minutes
overlap_ms = 5 * 1000 # 2 seconds

# Loop through all subfolders
for root, dirs, files in os.walk(base_folder):
    for file in files:
        if file.lower().endswith(".mp3"):
            input_path = os.path.join(root, file)
            output_txt = input_path.replace(".mp3", "_openai_transcript.txt")

            print(f"\n Processing: {input_path}")

            # Load audio
            audio = AudioSegment.from_file(input_path, format="mp3")
            duration = len(audio)

            with open(output_txt, "w", encoding="utf-8") as out_file:
                start = 0
                chunk_id = 0
                old_text = ""
                while start < duration:
                    end = min(start + chunk_length_ms, duration)
                    chunk = audio[start:end]
                    chunk_path = f"chunk_{chunk_id}.mp3"
                    chunk.export(chunk_path, format="mp3")

                    print(f"🔍 Transcribing chunk {chunk_id + 1}...")

                    with open(chunk_path, "rb") as f:
                        stream = client.audio.transcriptions.create(
                            model="gpt-4o-mini-transcribe",
                            file=f,
                            response_format="json",
                            language="en",
                        )
                        text = stream.text
                        # Check for duplicates
                        best_match = None
                        best_ratio = 0
                        prev_words = old_text.strip().split()[-20:]
                        curr_words = text.strip().split()
                        max_overlap = 20
                        for i in range(max_overlap, 1, -1):  # 
                            prev_slice = " ".join(prev_words[-i:])
                            curr_slice = " ".join(curr_words[:i])
                            ratio = SequenceMatcher(None, prev_slice, curr_slice).ratio()
                            if ratio > best_ratio:
                                best_ratio = ratio
                                best_match = i
                        if best_ratio > 0.9:
                            print("Removed duplicate words:",curr_words[:best_match])
                            curr_words = curr_words[best_match:]

                        text = " ".join(curr_words)
                        old_text = text

                        out_file.write(text + "\n" + "\n")
                        #print(text)
                    os.remove(chunk_path)
                    start = end - overlap_ms
                    
                    if end == duration:
                        break
                    chunk_id += 1

            print(f"✅ Transcript saved to: {output_txt}")



 Processing: C:\Users\welin\Documents\Thesis\Data\combined_21\4320211\4320211.mp3
🔍 Transcribing chunk 1...
🔍 Transcribing chunk 2...
🔍 Transcribing chunk 3...
Removed duplicate words: ['As', 'Brett', 'mentioned,', 'acquisitions', 'announced', 'and', 'completed', 'in', 'fiscal', '2020']
🔍 Transcribing chunk 4...
Removed duplicate words: ["We've", 'shared', 'with', 'investors', 'the', 'fact', 'that', 'our', 'service']
🔍 Transcribing chunk 5...
Removed duplicate words: ['in', 'FY21', 'to', 'include', 'all', 'stores.', 'We', 'talked', 'about', 'last', 'quarter,']
🔍 Transcribing chunk 6...
Removed duplicate words: ['Ladies', 'and', 'gentlemen,', 'thank', 'you', 'for', 'your', 'participation.', 'This', 'concludes', "today's"]
✅ Transcript saved to: C:\Users\welin\Documents\Thesis\Data\combined_21\4320211\4320211_openai_transcript.txt

 Processing: C:\Users\welin\Documents\Thesis\Data\combined_21\4330115\4330115.mp3
🔍 Transcribing chunk 1...
🔍 Transcribing chunk 2...
🔍 Transcribing chunk 3.

In [None]:
import os
from pydub import AudioSegment
from openai import OpenAI

base_folder = r"C:\Users\mark.welin\Documents\Thesis\Data\combined_22"
chunk_length_ms = 20 * 60 * 1000    # 20 minutes
overlap_ms = 10 * 1000              # 10 seconds

# Loop through all subfolders
for root, dirs, files in os.walk(base_folder):
    for file in files:
        if file.lower().endswith(".mp3"):
            input_path = os.path.join(root, file)
            output_txt = input_path.replace(".mp3", "_openai_transcript.txt")

            print(f"\n Processing: {input_path}")

            # Load audio
            audio = AudioSegment.from_file(input_path, format="mp3")
            duration = len(audio)

            with open(output_txt, "w", encoding="utf-8") as out_file:
                start = 0
                chunk_id = 0

                while start < duration:
                    end = min(start + chunk_length_ms, duration)
                    chunk = audio[start:end]
                    chunk_path = f"chunk_{chunk_id}.mp3"
                    chunk.export(chunk_path, format="mp3")

                    print(f"🔍 Transcribing chunk {chunk_id + 1}...")

                    with open(chunk_path, "rb") as f:
                        stream = client.audio.transcriptions.create(
                            model="gpt-4o-transcribe",
                            file=f,
                            response_format="text",
                            stream=True,
                            language="en",
                        )

                        for part in stream:
                            if part.type == "transcript.text.done":
                                print("done")
                                out_file.write(part.text + "\n")
                            print(part)

                    os.remove(chunk_path)
                    start = end - overlap_ms
                    if end == duration:
                        break
                    chunk_id += 1

            print(f"✅ Transcript saved to: {output_txt}")


🔍 Transcribing chunk 1...
TranscriptionTextDeltaEvent(delta='Ladies', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' and', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' gentlemen', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=',', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' thank', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' you', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' for', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' standing', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' by', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' and', type='transcript.text.delta', logprobs=None)
TranscriptionTextDeltaEvent(delta=' welcome', type='transcript.text.delta', logprobs=None)
Tr

In [22]:
from difflib import SequenceMatcher

with open(r"C:\Users\welin\Documents\Thesis\Data\combined_22\2020-03-0230487MTN-Ghana-2019-Annual-Results-Call\2020-03-0230487MTN-Ghana-2019-Annual-Results-Call_openai_transcript.txt", "r", encoding="utf-8") as f:
    content = f.read()

# Split the file into two parts using the first blank line as the split point
parts = content.split("\n\n", 1)

if len(parts) == 2:
    before = parts[0].strip()
    after = parts[1].strip()
else:
    before = content.strip()
    after = ""
best_match = None
best_ratio = 0
prev_words = before.strip().split()[-20:]
curr_words = after.strip().split()
max_overlap = 20
for i in range(max_overlap, 1, -1):  # 
    prev_slice = " ".join(prev_words[-i:])
    curr_slice = " ".join(curr_words[:i])
    ratio = SequenceMatcher(None, prev_slice, curr_slice).ratio()
    print(f"Ratio for {i} words: {ratio:.2f}")
    if ratio > best_ratio:
        best_ratio = ratio
        best_match = i
print("Removed duplicate words:",curr_words[:best_match])
curr_words = curr_words[best_match:]

text = " ".join(curr_words)
old_text = text


Ratio for 20 words: 0.63
Ratio for 19 words: 0.66
Ratio for 18 words: 0.69
Ratio for 17 words: 0.75
Ratio for 16 words: 0.81
Ratio for 15 words: 0.90
Ratio for 14 words: 0.97
Ratio for 13 words: 0.92
Ratio for 12 words: 0.88
Ratio for 11 words: 0.77
Ratio for 10 words: 0.60
Ratio for 9 words: 0.39
Ratio for 8 words: 0.24
Ratio for 7 words: 0.52
Ratio for 6 words: 0.39
Ratio for 5 words: 0.30
Ratio for 4 words: 0.25
Ratio for 3 words: 0.08
Ratio for 2 words: 0.14
Removed duplicate words: ['But', 'in', 'the', 'second', 'thing', 'was', 'also', 'after', 'the', 'banking', 'sector', 'cleanup', 'was', 'also']


In [16]:
loc = "C:\\Users\\welin\\Documents\\Thesis\\Test.mp3"
audio_file = open(loc, "rb")
stream = client.audio.transcriptions.create(
  model="gpt-4o-mini-transcribe", 
  file=audio_file, 
  response_format="text",
)
print(stream)



Thanks, Ajita. Sales for the second quarter of $738 million were 11 percent higher than the previous year. Net earnings in the second quarter of $92 million increased 6 percent from 2016. Second quarter earnings per share of 53 cents increased 8 percent compared with 2016. Sales in our North America segment of $471 million increased 9 percent compared with the second quarter of 2016. The increase in sales was primarily due

