In [40]:
#WER / CER 계산 코드 (구현)
import locale
locale.getpreferredencoding = lambda: "UTF-8"
# locale.getpreferredencoding()
# !pip install Levenshtein
import Levenshtein as Lev
import pandas as pd

def calculate_wer(ref, hyp ,debug=False):
        # Handle NaN values by converting them to empty strings
    if pd.isna(ref):
        ref = ""
    if pd.isna(hyp):
        hyp = ""
    r = ref.split()
    h = hyp.split()
    #costs will holds the costs, like in the Levenshtein distance algorithm
    costs = [[0 for inner in range(len(h)+1)] for outer in range(len(r)+1)]
    # backtrace will hold the operations we've done.
    # so we could later backtrace, like the WER algorithm requires us to.
    backtrace = [[0 for inner in range(len(h)+1)] for outer in range(len(r)+1)]

    OP_OK = 0
    OP_SUB = 1
    OP_INS = 2
    OP_DEL = 3

    DEL_PENALTY=1 # Tact
    INS_PENALTY=1 # Tact
    SUB_PENALTY=1 # Tact
    # First column represents the case where we achieve zero
    # hypothesis words by deleting all reference words.
    for i in range(1, len(r)+1):
        costs[i][0] = DEL_PENALTY*i
        backtrace[i][0] = OP_DEL

    # First row represents the case where we achieve the hypothesis
    # by inserting all hypothesis words into a zero-length reference.
    for j in range(1, len(h) + 1):
        costs[0][j] = INS_PENALTY * j
        backtrace[0][j] = OP_INS

    # computation
    for i in range(1, len(r)+1):
        for j in range(1, len(h)+1):
            if r[i-1] == h[j-1]:
                costs[i][j] = costs[i-1][j-1]
                backtrace[i][j] = OP_OK
            else:
                substitutionCost = costs[i-1][j-1] + SUB_PENALTY # penalty is always 1
                insertionCost    = costs[i][j-1] + INS_PENALTY   # penalty is always 1
                deletionCost     = costs[i-1][j] + DEL_PENALTY   # penalty is always 1

                costs[i][j] = min(substitutionCost, insertionCost, deletionCost)
                if costs[i][j] == substitutionCost:
                    backtrace[i][j] = OP_SUB
                elif costs[i][j] == insertionCost:
                    backtrace[i][j] = OP_INS
                else:
                    backtrace[i][j] = OP_DEL

    # back trace though the best route:
    i = len(r)
    j = len(h)
    numSub = 0
    numDel = 0
    numIns = 0
    numCor = 0
    if debug:
        print("OP\tREF\tHYP")
        lines = []
    while i > 0 or j > 0:
        if backtrace[i][j] == OP_OK:
            numCor += 1
            i-=1
            j-=1
            if debug:
                lines.append("OK\t" + r[i]+"\t"+h[j])
        elif backtrace[i][j] == OP_SUB:
            numSub +=1
            i-=1
            j-=1
            if debug:
                lines.append("SUB\t" + r[i]+"\t"+h[j])
        elif backtrace[i][j] == OP_INS:
            numIns += 1
            j-=1
            if debug:
                lines.append("INS\t" + "****" + "\t" + h[j])
        elif backtrace[i][j] == OP_DEL:
            numDel += 1
            i-=1
            if debug:
                lines.append("DEL\t" + r[i]+"\t"+"****")
    if debug:
        lines = reversed(lines)
        for line in lines:
            print(line)
        print("Ncor " + str(numCor))
        print("Nsub " + str(numSub))
        print("Ndel " + str(numDel))
        print("Nins " + str(numIns))
    return numCor, numSub, numDel, numIns, (numSub + numDel + numIns) / (float) (len(r))


In [47]:

def get_mean_wer_whisper(df):
    df['wer'] = df.apply(lambda row: calculate_wer(row['correct_text'], row['whisper_text'])[4], axis=1)
    return df['wer'].mean()

def get_mean_wer_llama(df):
    df['wer'] = df.apply(lambda row: calculate_wer(row['correct_text'], row['llm_text'])[4], axis=1)
    return df['wer'].mean()

def get_mean_wer_llama_ft(df):
    df['wer'] = df.apply(lambda row: calculate_wer(row['correct_text'], row['llm_output_text'])[4], axis=1)
    return df['wer'].mean()

In [50]:

df = pd.read_csv('atco2_test_dictation_by_whisper_small.csv', encoding='UTF-8')
mean_wer_1 = get_mean_wer_whisper(df)
mean_wer_1

1.0405270031874403

In [51]:
df = pd.read_csv('atco2_test_dictation_by_whisper_finetuned.csv', encoding='UTF-8')
mean_wer_2 = get_mean_wer_whisper(df)
mean_wer_2

1.0495432499523087

In [52]:
df = pd.read_csv('atco2_test_dictation_by_whisper_small_and_llama2_original.csv', encoding='UTF-8')
mean_wer_3 = get_mean_wer_llama(df)
mean_wer_3

1.068548207134803

In [53]:
df = pd.read_csv('atco2_test_dictation_by_whisper_finetuned_and_llama2_original.csv', encoding='UTF-8')
mean_wer_4 = get_mean_wer_llama(df)
mean_wer_4

0.9536716822147444

In [54]:
df = pd.read_csv('Whisper에 llama2 finetuned 붙여서 ATCO2 WER 뽑기_결과.csv', encoding='UTF-8')
mean_wer_5 = get_mean_wer_llama_ft(df)
mean_wer_5

0.8557662425136696

In [55]:
df = pd.read_csv('Whisper 파인튜닝한 모델에 llama2 finetuned 붙여서 ATCO2 WER 뽑기_결과.csv', encoding='UTF-8')
mean_wer_6 = get_mean_wer_llama_ft(df)
mean_wer_6

0.7405766128636935

In [58]:
print("위스퍼 기본모델              : ", mean_wer_1)
print("위스퍼 파인튜닝              : ", mean_wer_2)
print("위스퍼 기본모델 + 라마 기본모델 : ", mean_wer_3)
print("위스퍼 파인튜닝 + 라마 기본모델 : ", mean_wer_4)
print("위스퍼 기본모델 + 라마 파인튜닝 : ", mean_wer_5)
print("위스퍼 기본모델 + 라마 파인튜닝 : ", mean_wer_6)

위스퍼 기본모델              :  1.0405270031874403
위스퍼 파인튜닝              :  1.0495432499523087
위스퍼 기본모델 + 라마 기본모델 :  1.068548207134803
위스퍼 파인튜닝 + 라마 기본모델 :  0.9536716822147444
위스퍼 기본모델 + 라마 파인튜닝 :  0.8557662425136696
위스퍼 기본모델 + 라마 파인튜닝 :  0.7405766128636935
