In [1]:
import os
from happytransformer import HappyTextToText, TTSettings
import whisper
import subprocess

# Load T5 model for grammar correction
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)

# Path to the folder containing audio files
folder_path = "/content/audios"

# Define supported audio formats
supported_formats = [".mp3", ".mp4", ".wav", ".m4a", ".flac", ".aac"]

# Function to transcribe audio using Whisper
def transcribe_audio(file_path):
    try:
        model = whisper.load_model("large")
        transcription = model.transcribe(file_path)
        return transcription["text"]
    except Exception as e:
        print(f"Failed to transcribe audio: {e}")
        return ""

# Function to split text into manageable segments for grammar correction
def split_text(text, max_length=512):
    sentences = text.split('. ')
    segments = []
    current_segment = ""
    for sentence in sentences:
        if len(current_segment) + len(sentence) + 1 <= max_length:
            current_segment += sentence + '. '
        else:
            segments.append(current_segment.strip())
            current_segment = sentence + '. '
    if current_segment:
        segments.append(current_segment.strip())
    return segments

# Function to correct grammar in text
def correct_grammar(text):
    segments = split_text(text)
    corrected_segments = []
    for segment in segments:
        result = happy_tt.generate_text("grammar: " + segment, args=args)
        corrected_segments.append(result.text)
    return ' '.join(corrected_segments)

# Iterate through each file in the folder
for file_name in os.listdir(folder_path):
    if any(file_name.endswith(ext) for ext in supported_formats):
        # Construct full path to the audio file
        audio_path = os.path.join(folder_path, file_name)

        # Transcribe the audio file
        text = transcribe_audio(audio_path)

        if text:
            # Correct grammar
            corrected_text = correct_grammar(text)

            # Print the corrected text
            print("Original Transcription:")
            print(text)
            print("Corrected Transcription:")
            print(corrected_text)
            print("------")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Original Transcription:
 Each of the employees are required to attend the meeting.
Corrected Transcription:
Each of the employees are required to attend the meeting.
------
Original Transcription:
 Each of the employees are required to attend the meeting.
Corrected Transcription:
Each of the employees are required to attend the meeting.
------
Original Transcription:
 She is one of them people who likes to read.
Corrected Transcription:
She is one of those people who likes to read.
------
Original Transcription:
 We was planning to visit the museum but it was closed.
Corrected Transcription:
We were planning to visit the museum but it was closed.
------


In [5]:
!pip install happytransformer
!pip install -U openai-whisper
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
