In [4]:
import speech_recognition as sr 
import os 
from pydub import AudioSegment
from pydub.silence import split_on_silence
import tkinter as tk

r = sr.Recognizer()

# a function to recognize speech in the audio file
# so that we don't repeat ourselves in other functions
def transcribe_audio(path):
    # use the audio file as the audio source
    with sr.AudioFile(path) as source:
        audio_listened = r.record(source)
        # try converting it to text
        text = r.recognize_google(audio_listened)
    return text

# a function that splits the audio file into chunks on silence
# and applies speech recognition
def get_large_audio_transcription_on_silence(path):
    """Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks"""
    # open the audio file using pydub
    sound = AudioSegment.from_file(path)  
    # split audio sound where silence is 500 milliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory as WAV format
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        try:
            text = transcribe_audio(chunk_filename)
        except sr.UnknownValueError as e:
            print("Error:", str(e))
        else:
            text = f"{text.capitalize()}. "
            print(chunk_filename, ":", text)
            whole_text += text
    # return the text for all chunks detected
    return whole_text



In [5]:
from fpdf import FPDF
import fpdf

def export():
    class FPDF_Class(FPDF):
        pass
    
    pdf = FPDF_Class()
    
        
    file_name = os.path.basename(path)

    file_no_ext = file_name[:-4]
    
    pdf.set_margins(left=20, top=20, right=20)
    pdf.add_page()
    pdf.set_font('Helvetica', '', size=12)

    pdf.set_font('Helvetica', 'B', size=14)
    pdf.cell(0, 10, "Transcription of " + file_no_ext, ln=True, align='C')
    
    pdf.multi_cell(0, 10, transcription)

    pdf.ln(5)
    
    pdf.cell(w = 1, h = 10, txt="The audio can be found here.", link="https://www.youtube.com/results?search_query="+file_no_ext)
    
    pdf.output(file_no_ext + '.pdf')

    
def export_thread():
    threading.Thread(target=export).start()

In [None]:
import tkinter as tk
from tkinter import filedialog as fd
from tkinter import Button, Message
import pygame
from pygame import mixer
import threading

global transcription
transcription = None
global path
path = None

def open_file_selection():
    global transcription, path, open_file, choose_file
    if path:
        if path.endswith('.mp3'):
            print(path)
            transcription = get_large_audio_transcription_on_silence(path)
            print("\nFull text:", transcription)
        else:
            open_file_selection()

def choose_files():
    global path
    choose_files = False
    path = fd.askopenfilename()        
    transcription = None
    choose_files = True


def play_music():
    global path, play_music
    if path:
        try:
            mixer.music.load(path)
            mixer.music.play()
        except pygame.error as e:
            print(f"Error loading or playing music: {e}")
    else:
        print("No file selected or invalid file path")

def open_file_selection_thread():
    threading.Thread(target=open_file_selection).start()

def play_music_thread():
    threading.Thread(target=play_music).start()

def check_transcription():
    msg = Message(root, text="")
    if transcription:
        msg = Message(root, text=transcription)
        msg.pack()
    else:
        root.after(0, msg.destroy)
        msg = Message(root, text="Versurile nu exista sau operatia inca nu a fost efectuata")
        msg.pack()
        root.after(5000, msg.destroy)

def check_transcription_thread():
    threading.Thread(target=check_transcription).start()
    

root = tk.Tk()
root.geometry("500x500")
root.title("Transcription Generator")

mixer.init()
Button(root, text="Choose the Audio File", command=choose_files, width=52, height=1, font=("calibri", 12, "bold")).pack()
Button(root, text="Generate Transcription", command=lambda: open_file_selection_thread(), width=52, height=1, font=("calibri", 12, "bold")).pack()
Button(root, text="Show Transcription", command=check_transcription_thread, width=52, height=1, font=("calibri", 12, "bold")).pack()
Button(root, text="Play Audio", command=play_music_thread, width=52, height=1, font=("calibri", 12, "bold")).pack()
Button(root, text="Export Transcription", command=export_thread, width=52, height=1, font=("calibri", 12, "bold")).pack()

root.mainloop()


/home/gabriel/Desktop/tema-3/Harry Styles - Falling (Acapella) - Official.mp3
Error: 
audio-chunks/chunk2.wav : I'm in my bed. 
audio-chunks/chunk3.wav : You're not here. 
audio-chunks/chunk4.wav : In the snow want to blame but the drink and not wandering hands. 
audio-chunks/chunk5.wav : Get what i said. 
Error: 
audio-chunks/chunk7.wav : And i can't take it back i can't unpack the baggage left. 
audio-chunks/chunk8.wav : What am i now. 
Error: 
Error: 
audio-chunks/chunk11.wav : I'm falling again i'm falling again i'm falling. 
Error: 
Error: 
Error: 
audio-chunks/chunk15.wav : I'm falling again i'm falling again i'm falling. 
audio-chunks/chunk16.wav : You said you can. 
audio-chunks/chunk17.wav : Can you missed me to. 
audio-chunks/chunk18.wav : And i'm willing alright too many songs about you. 
audio-chunks/chunk19.wav : Coffees are. 
audio-chunks/chunk20.wav : At the beachwood cafe. 
audio-chunks/chunk21.wav : Who kills me kiss i know we've run out of things we can say. 
Error: 


In [None]:
print(transcription)