In [12]:
import pandas as pd
import math
from collections import Counter

def levenshtein_distance(s1: str, s2: str) -> int:
    """Compute Levenshtein edit distance between s1 and s2 (DP)."""
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    previous_row = list(range(len(s2) + 1))
    for i, c1 in enumerate(s1, start=1):
        current_row = [i]
        for j, c2 in enumerate(s2, start=1):
            insertions = previous_row[j] + 1
            deletions = current_row[j - 1] + 1
            substitutions = previous_row[j - 1] + (0 if c1 == c2 else 1)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]

def ngrams(seq, n):
    return [tuple(seq[i:i + n]) for i in range(len(seq) - n + 1)] if len(seq) >= n else []

def count_clip_matches(reference, hypothesis, n):
    ref_ngrams = Counter(ngrams(reference, n))
    hyp_ngrams = Counter(ngrams(hypothesis, n))
    return sum(min(hyp_ngrams[ng], ref_ngrams.get(ng, 0)) for ng in hyp_ngrams)

def sentence_bleu(reference, hypothesis, max_n=4, smoothing=True):
    """Compute BLEU for a single pair (char-based)."""
    precisions = []
    for n in range(1, max_n + 1):
        matched = count_clip_matches(reference, hypothesis, n)
        total = max(1, len(hypothesis) - n + 1)
        if smoothing:
            p_n = (matched + 1) / (total + 1)
        else:
            p_n = matched / total
        precisions.append(p_n)
    if min(precisions) == 0:
        geo_mean = 0.0
    else:
        log_prec_sum = sum((1 / max_n) * math.log(p) for p in precisions)
        geo_mean = math.exp(log_prec_sum)
    ref_len = len(reference)
    hyp_len = len(hypothesis)
    if hyp_len == 0:
        bp = 0.0
    elif hyp_len > ref_len:
        bp = 1.0
    else:
        bp = math.exp(1 - ref_len / hyp_len)
    return bp * geo_mean * 100.0

In [16]:
import pandas as pd
import math
from collections import Counter

def levenshtein_distance(s1: str, s2: str) -> int:
    """Compute Levenshtein edit distance between s1 and s2 (DP)."""
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    previous_row = list(range(len(s2) + 1))
    for i, c1 in enumerate(s1, start=1):
        current_row = [i]
        for j, c2 in enumerate(s2, start=1):
            insertions = previous_row[j] + 1
            deletions = current_row[j - 1] + 1
            substitutions = previous_row[j - 1] + (0 if c1 == c2 else 1)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]

def ngrams(seq, n):
    return [tuple(seq[i:i + n]) for i in range(len(seq) - n + 1)] if len(seq) >= n else []

def count_clip_matches(reference, hypothesis, n):
    ref_ngrams = Counter(ngrams(reference, n))
    hyp_ngrams = Counter(ngrams(hypothesis, n))
    return sum(min(hyp_ngrams[ng], ref_ngrams.get(ng, 0)) for ng in hyp_ngrams)

def sentence_bleu(reference, hypothesis, max_n=4, smoothing=True):
    """Compute BLEU for a single pair (char-based)."""
    precisions = []
    for n in range(1, max_n + 1):
        matched = count_clip_matches(reference, hypothesis, n)
        total = max(1, len(hypothesis) - n + 1)
        if smoothing:
            p_n = (matched + 1) / (total + 1)
        else:
            p_n = matched / total
        precisions.append(p_n)
    if min(precisions) == 0:
        geo_mean = 0.0
    else:
        log_prec_sum = sum((1 / max_n) * math.log(p) for p in precisions)
        geo_mean = math.exp(log_prec_sum)
    ref_len = len(reference)
    hyp_len = len(hypothesis)
    if hyp_len == 0:
        bp = 0.0
    elif hyp_len > ref_len:
        bp = 1.0
    else:
        bp = math.exp(1 - ref_len / hyp_len)
    return bp * geo_mean * 100.0

def calculate_accuracies(csv_path):
    df = pd.read_csv(csv_path)

    expected_cols = {'eng', 'actual', 'predicted'}
    if not expected_cols.issubset(df.columns):
        raise ValueError(f"CSV must have columns {expected_cols}")

    total_words = len(df)
    correct_words = 0
    total_chars_for_acc = 0
    correct_chars = 0
    total_edit_distance = 0
    total_ref_chars = 0

    references = []
    hypotheses = []
    row_metrics = []  # store per-row metrics

    for _, row in df.iterrows():
        english = str(row['eng']).strip()
        actual = str(row['actual']).strip()
        predicted = str(row['predicted']).strip()

        references.append(actual)
        hypotheses.append(predicted)

        # --- Word-level accuracy ---
        word_correct = 1 if actual == predicted else 0
        if word_correct:
            correct_words += 1

        # --- Character accuracy ---
        max_len = max(len(actual), len(predicted))
        total_chars_for_acc += max_len
        char_matches = sum(a == b for a, b in zip(actual, predicted))
        correct_chars += char_matches
        char_acc = (char_matches / max_len * 100) if max_len > 0 else 0

        # --- CER ---
        ed = levenshtein_distance(actual, predicted)
        total_edit_distance += ed
        total_ref_chars += len(actual)
        cer = (ed / len(actual) * 100) if len(actual) > 0 else 0

        # --- Sentence BLEU ---
        bleu = sentence_bleu(actual, predicted, max_n=4, smoothing=True)

        row_metrics.append({
            'english': english,
            'actual': actual,
            'predicted': predicted,
            'word_match': word_correct * 100,
            'char_acc': char_acc,
            'cer': cer,
            'bleu': bleu
        })

    # --- Corpus-level metrics ---
    word_acc = correct_words / total_words * 100 if total_words > 0 else 0
    char_acc = correct_chars / total_chars_for_acc * 100 if total_chars_for_acc > 0 else 0
    cer = total_edit_distance / total_ref_chars * 100 if total_ref_chars > 0 else 0
    corpus_bleu = sum(m['bleu'] for m in row_metrics) / len(row_metrics)

    print(f"тЬЕ Word-level accuracy:      {word_acc:.2f}%")
    print(f"тЬЕ Character-level accuracy: {char_acc:.2f}%")
    print(f"тЬЕ Character Error Rate (CER): {cer:.2f}%")
    print(f"тЬЕ Avg Sentence BLEU-4 (chars, smoothed): {corpus_bleu:.2f}%")

    metrics_df = pd.DataFrame(row_metrics)

    def show_top_and_bottom(metric):
        print(f"\nЁЯФ╣ Top 10 by {metric}:")
        print(metrics_df.sort_values(metric, ascending=False)[['english', 'actual', 'predicted', metric]].head(10).to_string(index=False))
        print(f"\nЁЯФ╕ Bottom 10 by {metric}:")
        print(metrics_df.sort_values(metric, ascending=True)[['english','actual', 'predicted', metric]].head(10).to_string(index=False))

    for metric in ['word_match', 'char_acc', 'cer', 'bleu']:
        show_top_and_bottom(metric)

    return metrics_df

In [17]:
metrics_df = calculate_accuracies('predictions/bi_no_attn_lstm_predictions.csv')

тЬЕ Word-level accuracy:      35.30%
тЬЕ Character-level accuracy: 67.41%
тЬЕ Character Error Rate (CER): 18.51%
тЬЕ Avg Sentence BLEU-4 (chars, smoothed): 69.33%

ЁЯФ╣ Top 10 by word_match:
           english           actual        predicted  word_match
            beemon            рдмреАрдореЛрдВ            рдмреАрдореЛрдВ         100
           durniti         рджреБрд░реНрдиреАрддрд┐         рджреБрд░реНрдиреАрддрд┐         100
upamukhyamantriyon рдЙрдкрдореБрдЦреНрдпрдордВрддреНрд░рд┐рдпреЛрдВ рдЙрдкрдореБрдЦреНрдпрдордВрддреНрд░рд┐рдпреЛрдВ         100
        mukeshvari        рдореБрдХреЗрд╢реНрд╡рд░реА        рдореБрдХреЗрд╢реНрд╡рд░реА         100
          tadipaar          рддрдбрд╝реАрдкрд╛рд░          рддрдбрд╝реАрдкрд╛рд░         100
            jeenon            рдЬреАрдиреЛрдВ            рдЬреАрдиреЛрдВ         100
    vishwasaniyata      рд╡рд┐рд╢реНрд╡рд╕рдиреАрдпрддрд╛      рд╡рд┐рд╢реНрд╡рд╕рдиреАрдпрддрд╛         100
         exkleshan       рдПрдХреНрд╕рдХреНрд

In [18]:
metrics_df1 = calculate_accuracies('predictions/uni_no_attn_lstm_predictions.csv')

тЬЕ Word-level accuracy:      17.19%
тЬЕ Character-level accuracy: 52.23%
тЬЕ Character Error Rate (CER): 26.78%
тЬЕ Avg Sentence BLEU-4 (chars, smoothed): 56.97%

ЁЯФ╣ Top 10 by word_match:
     english     actual  predicted  word_match
  baharampur   рдмрд╣рд░рд╛рдордкреБрд░   рдмрд╣рд░рд╛рдордкреБрд░         100
    varshiya     рд╡рд░реНрд╖рд┐рдп     рд╡рд░реНрд╖рд┐рдп         100
  vaahikayen   рд╡рд╛рд╣рд┐рдХрд╛рдПрдВ   рд╡рд╛рд╣рд┐рдХрд╛рдПрдВ         100
paathyaatmak рдкрд╛рдареНрдпрд╛рддреНрдордХ рдкрд╛рдареНрдпрд╛рддреНрдордХ         100
     gopalak     рдЧреЛрдкрд╛рд▓рдХ     рдЧреЛрдкрд╛рд▓рдХ         100
 vishnupuraa рд╡рд┐рд╖реНрдгреБрдкреБрд░рд╛ рд╡рд┐рд╖реНрдгреБрдкреБрд░рд╛         100
    bandhuon     рдмрдВрдзреБрдУрдВ     рдмрдВрдзреБрдУрдВ         100
niveshkartaa рдирд┐рд╡реЗрд╢рдХрд░реНрддрд╛ рдирд┐рд╡реЗрд╢рдХрд░реНрддрд╛         100
  updraviyon рдЙрдкрджреНрд░рд╡рд┐рдпреЛрдВ рдЙрдкрджреНрд░рд╡рд┐рдпреЛрдВ         100
      antrit    рдЖрдВрддреНрд░рд┐рдд    рд

In [None]:
metrics_df2 = calculate_accuracies('predictions/uni_attn_lstm_predictions.csv')