In [6]:
import os
from happytransformer import HappyTextToText, TTSettings
import whisper

# Load T5 model for grammar correction
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
args = TTSettings(num_beams=5, min_length=1)

# Path to the folder containing audio files
folder_path = "/content/drive/MyDrive/Colab Notebooks/audios"

# Iterate through each file in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith(".mp3"):  # Assuming all files are in mp3 format
        # Transcribe the audio file
        audio_path = os.path.join(folder_path, file_name)
        model = whisper.load_model("medium")
        transcription = model.transcribe(audio_path)
        text = transcription["text"]

        # Correct grammar
        result = happy_tt.generate_text("grammar: " + text, args=args)
        corrected_text = result.text

        # Print the corrected text
        print("Original Transcription:")
        print(text)
        print("Corrected Transcription:")
        print(corrected_text)
        print("------")




Original Transcription:
 I am not hungry, so I don't want anything to eat.
Corrected Transcription:
I am not hungry, so I don't want anything to eat.
------
Original Transcription:
 He have two dogs and a cat.
Corrected Transcription:
He has two dogs and a cat.
------
Original Transcription:
 The cat lays on the couch all day.
Corrected Transcription:
The cat lays on the couch all day.
------
Original Transcription:
 I seen that movie last week.
Corrected Transcription:
I saw that movie last week.
------
Original Transcription:
 You were supposed to call me yesterday.
Corrected Transcription:
You were supposed to call me yesterday.
------
Original Transcription:
 She have been working all day.
Corrected Transcription:
She has been working all day.
------
Original Transcription:
 Me and my friend is going to the movies.
Corrected Transcription:
Me and my friend are going to the movies.
------


In [2]:
pip install happytransformer

Collecting happytransformer
  Downloading happytransformer-3.0.0-py3-none-any.whl (24 kB)
Collecting datasets<3.0.0,>=2.13.1 (from happytransformer)
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/542.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate<1.0.0,>=0.20.1 (from happytransformer)
  Downloading accelerate-0.30.1-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
Collecting wandb (from happytransformer)
  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m105.0 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>

In [3]:
pip install -U openai-whisper

Collecting openai-whisper
  Downloading openai-whisper-20231117.tar.gz (798 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/798.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m778.2/798.6 kB[0m [31m23.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.6/798.6 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
