<a href="https://colab.research.google.com/github/Manika2219/Audio-to-Text-Legal-Document-Generator/blob/main/Audio_to_Text_Legal_Document_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install necessary libraries
!pip install openai-whisper python-docx pydub ffmpeg gradio

# Import libraries
import whisper
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.section import WD_ORIENTATION
from pydub import AudioSegment
import os
import gradio as gr

# Transcription function
def transcribe_audio(audio_file_path):
    model = whisper.load_model("base")  # Load Whisper model
    result = model.transcribe(audio_file_path)  # Transcribe the audio
    return result['text']

# Create a beautified legal format Word document
def create_beautified_doc(text, output_file="transcription.docx"):
    doc = Document()
    section = doc.sections[0]

    # Set legal paper size and orientation
    section.page_width = Inches(8.5)
    section.page_height = Inches(14)
    section.orientation = WD_ORIENTATION.PORTRAIT

    # Set margins
    section.top_margin = Inches(1)
    section.bottom_margin = Inches(1)
    section.left_margin = Inches(1)
    section.right_margin = Inches(1)

    # Add a title
    title = doc.add_paragraph()
    title_run = title.add_run("Audio Transcription")
    title_run.font.name = "Times New Roman"
    title_run.font.size = Pt(16)
    title_run.bold = True
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # Add a subtitle
    subtitle = doc.add_paragraph()
    subtitle_run = subtitle.add_run("Generated with High Accuracy and Proper Formatting")
    subtitle_run.font.name = "Times New Roman"
    subtitle_run.font.size = Pt(12)
    subtitle_run.italic = True
    subtitle.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # Add spacing between title and body
    doc.add_paragraph("\n")

    # Add the transcribed text with proper formatting
    paragraph = doc.add_paragraph()
    paragraph_run = paragraph.add_run(text)
    paragraph_run.font.name = "Times New Roman"
    paragraph_run.font.size = Pt(12)
    paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY  # Justify the text

    # Add page number
    footer = section.footer
    footer_paragraph = footer.paragraphs[0]
    footer_paragraph.text = "Page 1"
    footer_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # Save the document
    doc.save(output_file)

# Main processing function
def process_audio(audio_file):
    # Convert audio file to WAV format
    audio = AudioSegment.from_file(audio_file)
    converted_audio_path = "converted_audio.wav"
    audio.export(converted_audio_path, format="wav")

    # Transcribe the audio
    transcription_text = transcribe_audio(converted_audio_path)

    # Generate the beautified Word document
    output_doc = "transcription.docx"
    create_beautified_doc(transcription_text, output_doc)

    # Remove temporary converted audio file
    os.remove(converted_audio_path)

    return transcription_text, output_doc

# Gradio interface
with gr.Blocks() as gui:
    gr.Markdown("# Enhanced Audio-to-Text Transcription")
    gr.Markdown("Upload an audio file, transcribe it to text, and download a beautifully formatted legal-size document.")

    with gr.Row():
        audio_input = gr.Audio(label="Upload your audio file", type="filepath")
    with gr.Row():
        transcription_output = gr.Textbox(label="Transcribed Text", lines=15, interactive=False)
    with gr.Row():
        download_button = gr.File(label="Download Beautified Document")

    # Button to process the audio file
    submit_btn = gr.Button("Process Audio")
    submit_btn.click(
        fn=process_audio,
        inputs=[audio_input],
        outputs=[transcription_output, download_button]
    )

# Launch the Gradio interface
gui.launch()


Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting ffmpeg
  Downloading ffmpeg-1.4.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio
  Downloading gradio-5.10.0-py3-none-any.whl.metadata (16 kB)
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting triton>=2.0.0 (from openai-whisper)
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.

