In [1]:
import whisper
import json
import moviepy.editor as mp
import os
import statistics

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open("jsons/ground_truth.json") as file:
    ground_truth = json.load(file)

video_paths = list(ground_truth.keys())

In [None]:
# load model
model = whisper.load_model('medium')

for video in video_paths:
    # store ground_truth data in variable
    info = ground_truth.get(video)
    truth = info.get('language')
    print(truth)

    # load video and split audio
    audio_file = f'{os.path.basename(video)}.wav'
    clip = mp.VideoFileClip(video)
    clip.audio.write_audiofile(audio_file)

    # load audio and detect language
    audio = whisper.load_audio(audio_file)
    audio = whisper.pad_or_trim(audio)
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    _, probs = model.detect_language(mel)
    language = max(probs, key=probs.get)

    print(language)
    print(probs.get(language))

    # add data to dictionary
    ground_truth[video]['detected_language'] = language
    ground_truth[video]['confidence'] = probs.get(language)
    ground_truth[video]['confidence_on_correct_language'] = probs.get(truth)

    if truth == language:
        ground_truth[video]['correct'] = True
    else:
        ground_truth[video]['correct'] = False

    # remove audio file
    os.remove(audio_file)
    

# find average confidence of all failed detections
failed_confidence_scores = []
correct_confidence_scores = []

for video, data in ground_truth.items():
    if data['correct'] == False:
        failed_confidence_scores.append(data['confidence'])
    if data['correct'] == True:
        correct_confidence_scores.append(data['confidence_on_correct_language'])

ground_truth['average_failure_confidence'] = statistics.fmean(failed_confidence_scores)
ground_truth['average_correct_confidence'] = statistics.fmean(correct_confidence_scores)
ground_truth['lowest_correct_confidence'] = min(correct_confidence_scores)
ground_truth['highest_failed_confidence'] = max(failed_confidence_scores)

In [4]:
# export to json
with open("results.json", "w+") as file:
    json.dump(ground_truth, file)