In [None]:
!pip install openai-whisper #Whisper models
!pip install jiwer #WER computation
!pip install codecarbon #carbon footprint of my code

In [None]:
import whisper
from jiwer import wer
import os
import jiwer
import json
import re

In [25]:
#transcriptions
def transcribeAllFiles(model_v1, model_v2, path_to_folder:str):
    """
    provide the transcription of all .wav files contained in a folder in a dictionnary with two different versions of the Whisper model : model_v1 and model_v2

    :param str path_to_folder : path to the mother-folder which contains all .wav files, default to '.' (current folder)
    :param whisper.model.Whisper model_v1 : model, eg. Whisper-tiny
    :param whisper.model.Whisper model_v2 : model, eg. Whisper-small
    """

    if path_to_folder is None:
        path_to_folder = '.'
    #results is a dictionnary with the name of the file as a key and a tuple with tiny-transcription and small-transcription
    results = {}
    not_everything=0 #used to test, since running this function can take some time if all files are transribed

    # go through all files in the current folder
    for f in os.listdir(path_to_folder):
        if f.endswith('.wav') and not_everything<5:
            not_everything += 1

            # Transcription with first model
            tiny_result = model_v1.transcribe(f)
            tiny_text = tiny_result["text"]

            # Transcription with the other model
            small_result = model_v2.transcribe(f)
            small_text = small_result["text"]

            results[f] = (tiny_text, small_text)

    return results


# Display of results for test
def display_transcriptions(results : dict):
    """
    display of transcriptions

    :param results dict: name of the file as a key, tuple with tiny and small transcription as value
    """
    for f, (tiny_text, small_text) in results.items():
        print(f"File: {f}")
        print(f"Tiny Model: {tiny_text}")
        print(f"Small Model: {small_text}")
        print("-" * 30)

#evaluation visualisation
def visualisation_evaluation(gold_data, transcriptions_dict):
    """
    visualise properties of the evaluation of transcription compared to gold data (comparison of strings, WER, MER, WIL, WIP, CER)

    :param gold_data
    :param transcriptions_dict
    """

    for fichier, (tiny_text, small_text) in transcriptions_dict.items():

        out_tiny = jiwer.process_words(
            gold_data[fichier],
            [tiny_text],
        )
        print(jiwer.visualize_alignment(out_tiny))

        out_small = jiwer.process_words(
            gold_data[fichier],
            [small_text],
        )
        print(jiwer.visualize_alignment(out_small))


#gold data as a dict with only audio name and transcription
def process_json_gold_data(path):

    with open(path, 'r') as file:
        data = json.load(file)

    # match the pattern and only keep the name of the video
    pattern = re.compile(r'(\d+_\d+).MP4')

    #store the results
    result = {}

    # Iterate through the items and apply the regex
    for item in data:
        video_path = item.get("video_path", "")
        match = pattern.search(video_path)
        if match:
            # construct the new key with .wav to correspond to the audio files
            new_key = match.group(1) + ".wav"
            # Assign transcription as values
            result[new_key] = item.get("transcription_timestamp", "")

    return result


In [None]:
#only for google colab uses, to access the files
from google.colab import drive
drive.mount('/content/drive/')
%cd /content/drive/My Drive

In [None]:
#load the models
tiny_model = whisper.load_model("tiny")
small_model = whisper.load_model("small")

In [26]:
#transcriptions = transcribeAllFiles(tiny_model, small_model, path_to_folder= '/content/drive/My Drive/wav_data')
display_transcriptions(transcriptions)

gold_data = process_json_gold_data('/content/drive/My Drive/gold/cleaned_transcriptions.json')

visualisation_evaluation(gold_data, transcriptions)

File: 1140_00.wav
Tiny Model:  That's technology making our attention span shorter. Technology is definitely making our attention span shorter with social media, with apps. You get your one minute video and you know these days if it's anything longer than that. We can't be bothered. We only want to read the comments. We only want to, you know, go through and get the just the things. I know that if you know you see a long paragraph on internet, you will look and most people say, I'm not reading that. Can someone tell you what it says? It's too long. I'm not reading it. So I think that different apps particularly Instagram where everything's got to be quick. And you know, you got to get the short version of something in order to care. I think that's definitely making our attention span shorter. Social media, Facebook, you know, you make your short post at the post too long. No one wants to read it. Even YouTube videos, you know, once we see all this video 15 minutes long, I'm not watchin