In [28]:
import re
from datetime import timedelta

def convert_seconds_to_time(seconds):
    # Convert seconds to a string in the format 'HH:MM:SS'
    return str(timedelta(seconds=seconds))

def generate_srt_content(word_segments, max_chars=1):
    # Initialize some stuff
    srt_content = ""
    current_block = ""
    srt_segment_n = 1  # This is the subtitle number in the SRT file
    previous_end = 0  # Keep track of where the last word ended

    # Loop through each word to make sure it has a start and end time
    for i, word in enumerate(word_segments):
        if 'start' not in word or 'end' not in word:
            word['start'] = previous_end
            word['end'] = previous_end + 4  # If we don't know when it ends, just add 4 seconds

        # Update the end time for the next loop
        previous_end = word['end']

    # If the first word doesn't start at 0, add an empty subtitle at the beginning
    if word_segments[0]['start'] != 0:
        srt_content += f"1\n00:00:00,000 --> {convert_seconds_to_time(word_segments[0]['start'])}\n\n"
        srt_segment_n = 2  # Start the next subtitle at 2

    # Initialize the start and end times for the first block
    block_start_time = convert_seconds_to_time(word_segments[0]['start'])
    block_end_time = block_start_time

    # Loop through each word to build the SRT content
    for word in word_segments:
        next_word = word["word"] + " "

        # Check if we should start a new subtitle block
        if re.search(r'[.!?]\s*$', current_block) or len(current_block + next_word) > max_chars:
            srt_content += f"{srt_segment_n}\n{block_start_time} --> {block_end_time}\n{current_block.strip()}\n\n"
            srt_segment_n += 1
            block_start_time = block_end_time
            current_block = next_word
        else:
            current_block += next_word
            block_end_time = convert_seconds_to_time(word['end'])

    # Don't forget the last block!
    if current_block.strip():
        srt_content += f"{srt_segment_n}\n{block_start_time} --> {block_end_time}\n{current_block.strip()}\n\n"

    return srt_content

def save_srt_to_file(save_to, srt_content):
    # Create the full path for the new SRT file
    file_path = f"saved_srt_files/{save_to}_subtitles.srt"

    # If the file already exists, delete it
    if os.path.exists(file_path):
        os.remove(file_path)

    # Write the new SRT content to the file
    with open(file_path, 'a', encoding='utf-8') as srtFile:
        srtFile.write(srt_content)

    return os.path.basename(file_path)

# Example usage
word_segments = [
    {"word": "Hello", "start": 0.5, "end": 1},
    {"word": "world", "start": 4, "end": 5},
    {"word": ".", "start": 5, "end": 20}
]

srt_content = generate_srt_content(word_segments)
print(srt_content)  # This will print the SRT content
# save_srt_to_file("example", srt_content)  # Uncomment this to save the SRT content to a file


1
00:00:00,000 --> 0:00:00.500000

2
0:00:00.500000 --> 0:00:00.500000


3
0:00:00.500000 --> 0:00:00.500000
Hello

4
0:00:00.500000 --> 0:00:00.500000
world

5
0:00:00.500000 --> 0:00:00.500000
.




In [45]:
import replicate
def get_word_segments (audio_file_binary):
    output = replicate.run("daanelson/whisperx:9aa6ecadd30610b81119fc1b6807302fd18ca6cbb39b3216f430dcf23618cedd",input={"audio": audio_file_binary, "align_output": True})
 
    list_of_word_dicts = []
    for item_1 in output:
        # print(item_1)
        item_words = item_1['words']
        # print(item_words)
        for innter_words in item_words:
            # print(innter_words)
            list_of_word_dicts.append(innter_words)
    return list_of_word_dicts



audio_filepath = "transcriber/input_audio/test1.mp3"
audio_file_binary = open(audio_filepath, "rb")
word_segments = get_word_segments(audio_file_binary)
for word_segment in word_segments:
    print(word_segment)

{'end': 0.348, 'score': 0.789, 'start': 0.248, 'word': 'Are'}
{'end': 0.529, 'score': 0.8, 'start': 0.388, 'word': 'you'}
{'end': 0.909, 'score': 0.891, 'start': 0.629, 'word': 'ready'}
{'end': 1.049, 'score': 0.918, 'start': 0.989, 'word': 'to'}
{'end': 1.35, 'score': 0.836, 'start': 1.089, 'word': 'become'}
{'end': 1.43, 'score': 0.748, 'start': 1.37, 'word': 'an'}
{'end': 2.111, 'score': 0.887, 'start': 1.65, 'word': 'online'}
{'end': 2.671, 'score': 0.825, 'start': 2.151, 'word': 'millionaire?'}
{'end': 3.072, 'score': 0.838, 'start': 2.731, 'word': 'Discover'}
{'end': 3.172, 'score': 0.836, 'start': 3.092, 'word': 'the'}
{'end': 3.472, 'score': 0.824, 'start': 3.212, 'word': 'secret'}
{'end': 3.572, 'score': 0.749, 'start': 3.512, 'word': 'to'}
{'end': 3.893, 'score': 0.905, 'start': 3.613, 'word': 'making'}
{'end': 4.534, 'score': 0.847, 'start': 4.033, 'word': 'thousands'}
{'end': 4.674, 'score': 0.972, 'start': 4.574, 'word': 'from'}
{'end': 4.794, 'score': 0.833, 'start': 4.71

In [46]:
srt_contents = generate_srt_content(word_segments)
print(srt_contents)

1
00:00:00,000 --> 0:00:00.248000

2
0:00:00.248000 --> 0:00:00.248000


3
0:00:00.248000 --> 0:00:00.248000
Are

4
0:00:00.248000 --> 0:00:00.248000
you

5
0:00:00.248000 --> 0:00:00.248000
ready

6
0:00:00.248000 --> 0:00:00.248000
to

7
0:00:00.248000 --> 0:00:00.248000
become

8
0:00:00.248000 --> 0:00:00.248000
an

9
0:00:00.248000 --> 0:00:00.248000
online

10
0:00:00.248000 --> 0:00:00.248000
millionaire?

11
0:00:00.248000 --> 0:00:00.248000
Discover

12
0:00:00.248000 --> 0:00:00.248000
the

13
0:00:00.248000 --> 0:00:00.248000
secret

14
0:00:00.248000 --> 0:00:00.248000
to

15
0:00:00.248000 --> 0:00:00.248000
making

16
0:00:00.248000 --> 0:00:00.248000
thousands

17
0:00:00.248000 --> 0:00:00.248000
from

18
0:00:00.248000 --> 0:00:00.248000
the

19
0:00:00.248000 --> 0:00:00.248000
comfort

20
0:00:00.248000 --> 0:00:00.248000
of

21
0:00:00.248000 --> 0:00:00.248000
your

22
0:00:00.248000 --> 0:00:00.248000
own

23
0:00:00.248000 --> 0:00:00.248000
home

24
0:00:00.2480