In [3]:
import jiwer
import pandas as pd

In [4]:
results = pd.read_csv('../my_tests/whisper_output.csv', index_col=0)
results

Unnamed: 0_level_0,prompt,output
number,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,The Hubble Space Telescope has revolutionized ...,The Hubble Space Telescope has revolutionized...
2.0,Photosynthesis is the process by which plants ...,Photosynthesis is the process by which plants...
3.0,The Industrial Revolution began in Britain in ...,The Industrial Revolution began in Britain in...
4.0,Chess is a strategic board game that originate...,Chess is a strategic board game that originat...
5.0,The human brain contains approximately 86 bill...,The human brain contains approximately 86 bil...
...,...,...
106.0,Happy New Year\n,Happy New Year!\n
107.0,I'm fine\n,I'm fine.\n
108.0,Let's go\n,Let's go.\n
109.0,Stop\n,Stop! OI!\n


In [7]:
metric_funcs = {
    'cer': jiwer.cer,
    'mer': jiwer.mer,
    'wer': jiwer.wer,
    'wil': jiwer.wil,
    'wip': jiwer.wip,
}

In [18]:
transforms = jiwer.Compose(
    [
        jiwer.ExpandCommonEnglishContractions(),
        jiwer.RemoveEmptyStrings(),
        jiwer.ToLowerCase(),
        jiwer.RemoveMultipleSpaces(),
        jiwer.Strip(),
        jiwer.RemovePunctuation(),
        jiwer.ReduceToListOfListOfWords(),
    ]
)

In [19]:
metrics = {}

for metric, func in metric_funcs.items():
    _metrics = []
    for _, row in results.iterrows():
        _metrics.append(func(
            row['prompt'], 
            row['output'],
            truth_transform=transforms,
            hypothesis_transform=transforms,
        ))
    metrics[metric] = _metrics

In [12]:
metrics = pd.DataFrame(metrics, index=results.index)
metrics

Unnamed: 0_level_0,cer,mer,wer,wil,wip
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.0,0.000000,0.000000,0.000000,0.000000,1.000000
2.0,0.000000,0.000000,0.000000,0.000000,1.000000
3.0,0.005556,0.035714,0.035714,0.070153,0.929847
4.0,0.000000,0.000000,0.000000,0.000000,1.000000
5.0,0.000000,0.000000,0.000000,0.000000,1.000000
...,...,...,...,...,...
106.0,0.071429,0.333333,0.333333,0.555556,0.444444
107.0,0.125000,0.500000,0.500000,0.750000,0.250000
108.0,0.125000,0.500000,0.500000,0.750000,0.250000
109.0,1.250000,1.000000,2.000000,1.000000,0.000000


In [21]:
results.merge(metrics, left_index=True, right_index=True).to_csv('../my_tests/metrics.csv')

In [20]:
metrics = pd.DataFrame(metrics, index=results.index)
metrics

Unnamed: 0_level_0,cer,mer,wer,wil,wip
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.0,0.0,0.0,0.0,0.0,1.0
2.0,0.0,0.0,0.0,0.0,1.0
3.0,0.0,0.0,0.0,0.0,1.0
4.0,0.0,0.0,0.0,0.0,1.0
5.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...
106.0,0.0,0.0,0.0,0.0,1.0
107.0,0.0,0.0,0.0,0.0,1.0
108.0,0.0,0.0,0.0,0.0,1.0
109.0,1.0,0.5,1.0,0.5,0.5
