In [1]:
import pandas as pd

def calculate_accuracies(csv_path):
    # Read CSV with headers
    df = pd.read_csv(csv_path)

    # Verify that required columns exist
    if not {'hin', 'processed_mistral'}.issubset(df.columns):
        raise ValueError("CSV must have columns 'hin' and 'original_response_mistral'")

    total_words = len(df)
    correct_words = 0
    total_chars = 0
    correct_chars = 0

    for _, row in df.iterrows():
        actual = str(row['hin']).strip()
        predicted = str(row['processed_mistral']).strip()

        # --- Word-level accuracy ---
        if actual == predicted:
            correct_words += 1

        # --- Character-level accuracy ---
        max_len = max(len(actual), len(predicted))
        total_chars += max_len

        # Count character matches up to the shorter string
        for a, b in zip(actual, predicted):
            if a == b:
                correct_chars += 1

    word_acc = correct_words / total_words * 100 if total_words > 0 else 0
    char_acc = correct_chars / total_chars * 100 if total_chars > 0 else 0

    print(f"‚úÖ Word-level accuracy: {word_acc:.2f}%")
    print(f"‚úÖ Character-level accuracy: {char_acc:.2f}%")

    return word_acc, char_acc

In [7]:
import pandas as pd

def calculate_accuracies1(csv_path):
    # Read CSV with headers
    df = pd.read_csv(csv_path)

    # Verify that required columns exist
    if not {'hin', 'original_response_mistral'}.issubset(df.columns):
        raise ValueError("CSV must have columns 'hin' and 'original_response_mistral'")

    total_words = len(df)
    correct_words = 0
    total_chars = 0
    correct_chars = 0

    for _, row in df.iterrows():
        actual = str(row['hin']).strip()
        predicted = str(row['original_response_mistral']).strip()

        # --- Word-level accuracy ---
        if actual == predicted:
            correct_words += 1

        # --- Character-level accuracy ---
        max_len = max(len(actual), len(predicted))
        total_chars += max_len

        # Count character matches up to the shorter string
        for a, b in zip(actual, predicted):
            if a == b:
                correct_chars += 1

    word_acc = correct_words / total_words * 100 if total_words > 0 else 0
    char_acc = correct_chars / total_chars * 100 if total_chars > 0 else 0

    print(f"‚úÖ Word-level accuracy: {word_acc:.2f}%")
    print(f"‚úÖ Character-level accuracy: {char_acc:.2f}%")

    return word_acc, char_acc

In [8]:
calculate_accuracies('processed_mistral.csv')

‚úÖ Word-level accuracy: 29.05%
‚úÖ Character-level accuracy: 60.04%


(29.05458860759494, 60.04292830433198)

In [9]:
calculate_accuracies1('processed_mistral.csv')

‚úÖ Word-level accuracy: 28.64%
‚úÖ Character-level accuracy: 40.53%


(28.639240506329116, 40.52810452752139)

In [18]:
import pandas as pd
import math
from collections import Counter

def levenshtein_distance(s1: str, s2: str) -> int:
    """Compute Levenshtein edit distance between s1 and s2 (DP)."""
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    previous_row = list(range(len(s2) + 1))
    for i, c1 in enumerate(s1, start=1):
        current_row = [i]
        for j, c2 in enumerate(s2, start=1):
            insertions = previous_row[j] + 1
            deletions = current_row[j - 1] + 1
            substitutions = previous_row[j - 1] + (0 if c1 == c2 else 1)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]

def ngrams(seq, n):
    return [tuple(seq[i:i + n]) for i in range(len(seq) - n + 1)] if len(seq) >= n else []

def count_clip_matches(reference, hypothesis, n):
    ref_ngrams = Counter(ngrams(reference, n))
    hyp_ngrams = Counter(ngrams(hypothesis, n))
    return sum(min(hyp_ngrams[ng], ref_ngrams.get(ng, 0)) for ng in hyp_ngrams)

def sentence_bleu(reference, hypothesis, max_n=4, smoothing=True):
    """Compute BLEU for a single pair (char-based)."""
    precisions = []
    for n in range(1, max_n + 1):
        matched = count_clip_matches(reference, hypothesis, n)
        total = max(1, len(hypothesis) - n + 1)
        if smoothing:
            p_n = (matched + 1) / (total + 1)
        else:
            p_n = matched / total
        precisions.append(p_n)
    if min(precisions) == 0:
        geo_mean = 0.0
    else:
        log_prec_sum = sum((1 / max_n) * math.log(p) for p in precisions)
        geo_mean = math.exp(log_prec_sum)
    ref_len = len(reference)
    hyp_len = len(hypothesis)
    if hyp_len == 0:
        bp = 0.0
    elif hyp_len > ref_len:
        bp = 1.0
    else:
        bp = math.exp(1 - ref_len / hyp_len)
    return bp * geo_mean * 100.0

def calculate_accuracies(csv_path):
    df = pd.read_csv(csv_path)

    expected_cols = {'hin', 'processed_mistral'}
    if not expected_cols.issubset(df.columns):
        raise ValueError(f"CSV must have columns {expected_cols}")

    total_words = len(df)
    correct_words = 0
    total_chars_for_acc = 0
    correct_chars = 0
    total_edit_distance = 0
    total_ref_chars = 0

    references = []
    hypotheses = []
    row_metrics = []  # store per-row metrics

    for _, row in df.iterrows():
        english = str(row['eng']).strip()
        actual = str(row['hin']).strip()
        predicted = str(row['processed_mistral']).strip()

        references.append(actual)
        hypotheses.append(predicted)

        # --- Word-level accuracy ---
        word_correct = 1 if actual == predicted else 0
        if word_correct:
            correct_words += 1

        # --- Character accuracy ---
        max_len = max(len(actual), len(predicted))
        total_chars_for_acc += max_len
        char_matches = sum(a == b for a, b in zip(actual, predicted))
        correct_chars += char_matches
        char_acc = (char_matches / max_len * 100) if max_len > 0 else 0

        # --- CER ---
        ed = levenshtein_distance(actual, predicted)
        total_edit_distance += ed
        total_ref_chars += len(actual)
        cer = (ed / len(actual) * 100) if len(actual) > 0 else 0

        # --- Sentence BLEU ---
        bleu = sentence_bleu(actual, predicted, max_n=4, smoothing=True)

        row_metrics.append({
            'english': english,
            'actual': actual,
            'predicted': predicted,
            'word_match': word_correct * 100,
            'char_acc': char_acc,
            'cer': cer,
            'bleu': bleu
        })

    # --- Corpus-level metrics ---
    word_acc = correct_words / total_words * 100 if total_words > 0 else 0
    char_acc = correct_chars / total_chars_for_acc * 100 if total_chars_for_acc > 0 else 0
    cer = total_edit_distance / total_ref_chars * 100 if total_ref_chars > 0 else 0
    corpus_bleu = sum(m['bleu'] for m in row_metrics) / len(row_metrics)

    print(f"‚úÖ Word-level accuracy:      {word_acc:.2f}%")
    print(f"‚úÖ Character-level accuracy: {char_acc:.2f}%")
    print(f"‚úÖ Character Error Rate (CER): {cer:.2f}%")
    print(f"‚úÖ Avg Sentence BLEU-4 (chars, smoothed): {corpus_bleu:.2f}%")

    metrics_df = pd.DataFrame(row_metrics)

    def show_top_and_bottom(metric):
        print(f"\nüîπ Top 10 by {metric}:")
        print(metrics_df.sort_values(metric, ascending=False)[['english', 'actual', 'predicted', metric]].head(10).to_string(index=False))
        print(f"\nüî∏ Bottom 10 by {metric}:")
        print(metrics_df.sort_values(metric, ascending=True)[['english','actual', 'predicted', metric]].head(10).to_string(index=False))

    for metric in ['word_match', 'char_acc', 'cer', 'bleu']:
        show_top_and_bottom(metric)

    return metrics_df

In [19]:
metrics_df = calculate_accuracies('processed_mistral.csv')

‚úÖ Word-level accuracy:      29.05%
‚úÖ Character-level accuracy: 60.04%
‚úÖ Character Error Rate (CER): 22.39%
‚úÖ Avg Sentence BLEU-4 (chars, smoothed): 65.25%

üîπ Top 10 by word_match:
       english         actual      predicted  word_match
  maitrologist  ‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü  ‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü         100
khullamkhullaa   ‡§ñ‡•Å‡§≤‡•ç‡§≤‡§Æ‡§ñ‡•Å‡§≤‡•ç‡§≤‡§æ   ‡§ñ‡•Å‡§≤‡•ç‡§≤‡§Æ‡§ñ‡•Å‡§≤‡•ç‡§≤‡§æ         100
      sthulata        ‡§∏‡•ç‡§•‡•Ç‡§≤‡§§‡§æ        ‡§∏‡•ç‡§•‡•Ç‡§≤‡§§‡§æ         100
   pratirakshi     ‡§™‡•ç‡§∞‡§§‡§ø‡§∞‡§ï‡•ç‡§∑‡•Ä     ‡§™‡•ç‡§∞‡§§‡§ø‡§∞‡§ï‡•ç‡§∑‡•Ä         100
      dwimasik      ‡§¶‡•ç‡§µ‡§ø‡§Æ‡§æ‡§∏‡§ø‡§ï      ‡§¶‡•ç‡§µ‡§ø‡§Æ‡§æ‡§∏‡§ø‡§ï         100
         datri         ‡§¶‡§æ‡§§‡•ç‡§∞‡•Ä         ‡§¶‡§æ‡§§‡•ç‡§∞‡•Ä         100
  elctromaijik ‡§á‡§≤‡•á‡§ï‡•ç‡§ü‡•ç‡§∞‡•ã‡§Æ‡•à‡§ú‡§ø‡§ï ‡§á‡§≤‡•á‡§ï‡•ç‡§ü‡•ç‡§∞‡•ã‡§Æ‡•à‡§ú‡§ø‡§ï         100
         barsa           ‡§¨‡§∞‡§∏‡§æ           ‡§¨‡§∞‡§∏‡§æ         

In [22]:
import pandas as pd

def show_top_and_bottom(metric, metrics_df):
    # Round numeric column
    df_display = metrics_df.copy()
    df_display[metric] = df_display[metric].round(2)

    # Determine sorting order
    ascending = True if metric == 'cer' else False

    # Sort and select top/bottom examples
    top_examples = df_display.sort_values(metric, ascending=ascending).head(10)[['english', 'actual', 'predicted', metric]]
    bottom_examples = df_display.sort_values(metric, ascending=not ascending).head(10)[['english', 'actual', 'predicted', metric]]
    
    print(f"\n{'='*100}")
    print(f"üîπ Top 10 examples by {metric.upper()}")
    display(top_examples.style.background_gradient(cmap='Greens', subset=[metric])
            .set_properties(**{'text-align': 'left'})
            .set_table_styles([{
                'selector': 'th',
                'props': [('text-align', 'left'), ('font-weight', 'bold')]
            }]))
    
    print(f"\nüî∏ Bottom 10 examples by {metric.upper()}")
    display(bottom_examples.style.background_gradient(cmap='Reds_r', subset=[metric])
            .set_properties(**{'text-align': 'left'})
            .set_table_styles([{
                'selector': 'th',
                'props': [('text-align', 'left'), ('font-weight', 'bold')]
            }]))
    print(f"{'='*100}\n")

# Example usage
for metric in ['word_match', 'char_acc', 'cer', 'bleu']:
    show_top_and_bottom(metric, metrics_df)


üîπ Top 10 examples by WORD_MATCH


Unnamed: 0,english,actual,predicted,word_match
0,maitrologist,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,100
3321,khullamkhullaa,‡§ñ‡•Å‡§≤‡•ç‡§≤‡§Æ‡§ñ‡•Å‡§≤‡•ç‡§≤‡§æ,‡§ñ‡•Å‡§≤‡•ç‡§≤‡§Æ‡§ñ‡•Å‡§≤‡•ç‡§≤‡§æ,100
7665,sthulata,‡§∏‡•ç‡§•‡•Ç‡§≤‡§§‡§æ,‡§∏‡•ç‡§•‡•Ç‡§≤‡§§‡§æ,100
3327,pratirakshi,‡§™‡•ç‡§∞‡§§‡§ø‡§∞‡§ï‡•ç‡§∑‡•Ä,‡§™‡•ç‡§∞‡§§‡§ø‡§∞‡§ï‡•ç‡§∑‡•Ä,100
3331,dwimasik,‡§¶‡•ç‡§µ‡§ø‡§Æ‡§æ‡§∏‡§ø‡§ï,‡§¶‡•ç‡§µ‡§ø‡§Æ‡§æ‡§∏‡§ø‡§ï,100
3333,datri,‡§¶‡§æ‡§§‡•ç‡§∞‡•Ä,‡§¶‡§æ‡§§‡•ç‡§∞‡•Ä,100
3334,elctromaijik,‡§á‡§≤‡•á‡§ï‡•ç‡§ü‡•ç‡§∞‡•ã‡§Æ‡•à‡§ú‡§ø‡§ï,‡§á‡§≤‡•á‡§ï‡•ç‡§ü‡•ç‡§∞‡•ã‡§Æ‡•à‡§ú‡§ø‡§ï,100
7659,barsa,‡§¨‡§∞‡§∏‡§æ,‡§¨‡§∞‡§∏‡§æ,100
7657,gayitri,‡§ó‡§æ‡§Ø‡§§‡•ç‡§∞‡•Ä,‡§ó‡§æ‡§Ø‡§§‡•ç‡§∞‡•Ä,100
7653,jeenon,‡§ú‡•Ä‡§®‡•ã‡§Ç,‡§ú‡•Ä‡§®‡•ã‡§Ç,100



üî∏ Bottom 10 examples by WORD_MATCH


Unnamed: 0,english,actual,predicted,word_match
5055,sankary,‡§∏‡§Ç‡§ï‡§∞‡•Ä,‡§∏‡§Ç‡§ï‡§∞‡•ç‡§Ø,0
6135,suta,‡§∏‡•Å‡§§‡§æ,‡§∏‡•Ç‡§§‡§æ,0
6134,sacchidanand,‡§∏‡§ö‡•ç‡§ö‡§ø‡§¶‡§æ‡§®‡§Ç‡§¶,‡§∏‡§ö‡•ç‡§ö‡§ø‡§¶‡§æ‡§®‡§®‡•ç‡§¶,0
6133,sahakalakar,‡§∏‡§π‡§ï‡§≤‡§æ‡§ï‡§æ‡§∞,‡§∏‡§π‡§ï‡§æ‡§∞‡§ï,0
6131,wireless,‡§µ‡§æ‡§Ø‡§∞‡§≤‡•á‡§∏,‡§µ‡§æ‡§á‡§∞‡§≤‡•á‡§∏,0
6130,jalbhrav,‡§ú‡§≤‡§≠‡§∞‡§æ‡§µ,‡§ú‡§æ‡§≤‡§≠‡•ç‡§∞‡§µ,0
6129,jadd,‡§ú‡§°‡§º,‡§ú‡§æ‡§¶,0
6128,haan,‡§π‡§æ‡§Ç,‡§π‡§æ‡§Å,0
6125,shahdara,‡§∂‡§π‡§æ‡§¶‡§∞‡§æ,‡§∂‡§π‡§¶‡§∞‡§æ,0
6123,jhuthlane,‡§ù‡•Å‡§†‡§≤‡§æ‡§®‡•á,‡§ù‡•Ç‡§†‡§≤‡•á‡§®‡•á,0




üîπ Top 10 examples by CHAR_ACC


Unnamed: 0,english,actual,predicted,char_acc
0,maitrologist,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,100.0
7443,sarkaa,‡§∏‡§∞‡§ï‡§æ,‡§∏‡§∞‡§ï‡§æ,100.0
7391,jeen,‡§ú‡•Ä‡§®,‡§ú‡•Ä‡§®,100.0
7389,chipakane,‡§ö‡§ø‡§™‡§ï‡§®‡•á,‡§ö‡§ø‡§™‡§ï‡§®‡•á,100.0
7387,lanset,‡§≤‡•à‡§Ç‡§∏‡•á‡§ü,‡§≤‡•à‡§Ç‡§∏‡•á‡§ü,100.0
7385,mol,‡§Æ‡•ã‡§≤,‡§Æ‡•ã‡§≤,100.0
7384,jaagriti,‡§ú‡§æ‡§ó‡•É‡§§‡§ø,‡§ú‡§æ‡§ó‡•É‡§§‡§ø,100.0
3143,protects,‡§™‡•ç‡§∞‡•ã‡§ü‡•á‡§ï‡•ç‡§ü‡•ç‡§∏,‡§™‡•ç‡§∞‡•ã‡§ü‡•á‡§ï‡•ç‡§ü‡•ç‡§∏,100.0
3144,laapattaa,‡§≤‡§æ‡§™‡§§‡•ç‡§§‡§æ,‡§≤‡§æ‡§™‡§§‡•ç‡§§‡§æ,100.0
3145,kharakon,‡§ñ‡§æ‡§∞‡§ï‡•ã‡§Ç,‡§ñ‡§æ‡§∞‡§ï‡•ã‡§Ç,100.0



üî∏ Bottom 10 examples by CHAR_ACC


Unnamed: 0,english,actual,predicted,char_acc
5886,rushikul,‡§ã‡§∑‡§ø‡§ï‡•Å‡§≤,‡§∞‡•Å‡§∂‡§ø‡§ï‡•Å‡§≤,0.0
7497,tankaar,‡§ü‡§Ç‡§ï‡§æ‡§∞,‡§§‡§æ‡§Ç‡§ï‡§æ‡§∞,0.0
768,nn,‡§è‡§®‡§è‡§®,‡§®‡§æ‡§®‡•Ä,0.0
2702,chhakaate,‡§õ‡§ï‡§æ‡§§‡•á,‡§ö‡•ç‡§õ‡§ï‡§æ‡§ü‡•á,0.0
5778,chhetthri,‡§õ‡•á‡§§‡•ç‡§∞‡•Ä,‡§ö‡•ç‡§õ‡•á‡§§‡•ç‡§§‡•ç‡§∞‡•Ä,0.0
9379,saturday,‡§∏‡•á‡§ü‡§∞‡§°‡•á,‡§∂‡§®‡§ø‡§µ‡§æ‡§∞,0.0
5567,tandon,‡§ü‡§Ç‡§°‡§®,‡§§‡§æ‡§Ç‡§°‡•ã‡§®,0.0
3790,lam,‡§Ü‡§à‡§è‡§è‡§Æ,‡§≤‡§æ‡§Æ,0.0
7538,dam,‡§°‡§Æ,‡§¶‡§æ‡§Æ,0.0
7547,thirty,‡§•‡§∞‡•ç‡§ü‡•Ä,‡§§‡•ç‡§∞‡§ø‡§Ç‡§ü‡•Ä,0.0




üîπ Top 10 examples by CER


Unnamed: 0,english,actual,predicted,cer
0,maitrologist,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,0.0
8957,brahmleen,‡§¨‡•ç‡§∞‡§π‡•ç‡§Æ‡§≤‡•Ä‡§®,‡§¨‡•ç‡§∞‡§π‡•ç‡§Æ‡§≤‡•Ä‡§®,0.0
4142,moines,‡§Æ‡•ã‡§á‡§®‡•á‡§∏,‡§Æ‡•ã‡§á‡§®‡•á‡§∏,0.0
4150,olympia,‡§ì‡§≤‡§Æ‡•ç‡§™‡§ø‡§Ø‡§æ,‡§ì‡§≤‡§Æ‡•ç‡§™‡§ø‡§Ø‡§æ,0.0
8955,jalvaayu,‡§ú‡§≤‡§µ‡§æ‡§Ø‡•Å,‡§ú‡§≤‡§µ‡§æ‡§Ø‡•Å,0.0
4159,james,‡§ú‡•á‡§Æ‡•ç‡§∏,‡§ú‡•á‡§Æ‡•ç‡§∏,0.0
4162,erickson,‡§è‡§∞‡§ø‡§ï‡•ç‡§∏‡§®,‡§è‡§∞‡§ø‡§ï‡•ç‡§∏‡§®,0.0
8950,masaaledaar,‡§Æ‡§∏‡§æ‡§≤‡•á‡§¶‡§æ‡§∞,‡§Æ‡§∏‡§æ‡§≤‡•á‡§¶‡§æ‡§∞,0.0
8947,vibhram,‡§µ‡§ø‡§≠‡•ç‡§∞‡§Æ,‡§µ‡§ø‡§≠‡•ç‡§∞‡§Æ,0.0
4168,state,‡§∏‡•ç‡§ü‡•á‡§ü,‡§∏‡•ç‡§ü‡•á‡§ü,0.0



üî∏ Bottom 10 examples by CER


Unnamed: 0,english,actual,predicted,cer
2773,peeechdablyooseees,‡§™‡•Ä‡§è‡§ö‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§∏‡•Ä‡§è‡§∏,‡§™‡•Ä‡§ö‡•ç‡§õ‡§¶‡§¨‡§≤‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏,800.0
1099,evav,‡§è‡§µ‡§µ,‡§è‡§ï‡•ç‡§∏‡•Ä‡§µ‡•Ä‡§è‡§µ,200.0
8421,jh,‡§ù,‡§ù‡•ç‡§π,200.0
455,overall,‡§ì‡§µ‡§∞‡§ë‡§≤,‡§ì‡§µ‡§∞‡§µ‡•ç‡§Ø‡§æ‡§µ‡§∏‡•ç‡§•‡§æ,180.0
6995,enthe,‡§ê‡§Ç‡§†‡•á,‡§è‡§®‡§ü‡•Ä‡§è‡§•‡•Ä,175.0
7762,ear,‡§á‡§Ö‡§∞,‡§Ü‡§Å‡§ó‡•Å‡§≤,166.67
9979,error,‡§è‡§∞‡§∞,‡§è‡§ï‡•ç‡§∏‡§ü‡•ç‡§∞,166.67
6866,dit,‡§°‡§ø‡§ü,‡§¶‡§ø‡§§‡•ç‡§§‡•ç,166.67
10035,mtec,‡§è‡§Æ‡§ü‡•á‡§ï,‡§Æ‡•ç‡§ü‡•Ä‡§∏‡•ç‡§ï‡•ç‡§Ø‡•Ç,160.0
5791,very,‡§µ‡•à‡§∞‡•Ä,‡§µ‡§∞‡•ç‡§§‡§Æ‡§æ‡§®,150.0




üîπ Top 10 examples by BLEU


Unnamed: 0,english,actual,predicted,bleu
0,maitrologist,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,‡§Æ‡•à‡§ü‡•ç‡§∞‡•ã‡§≤‡•â‡§ú‡§ø‡§∏‡•ç‡§ü,100.0
6927,fisla,‡§´‡§ø‡§∏‡§≤‡§æ,‡§´‡§ø‡§∏‡§≤‡§æ,100.0
6979,andrabi,‡§Ö‡§Ç‡§¶‡•ç‡§∞‡§æ‡§¨‡•Ä,‡§Ö‡§Ç‡§¶‡•ç‡§∞‡§æ‡§¨‡•Ä,100.0
2219,sscl,‡§è‡§∏‡§è‡§∏‡§∏‡•Ä‡§è‡§≤,‡§è‡§∏‡§è‡§∏‡§∏‡•Ä‡§è‡§≤,100.0
6971,phurti,‡§´‡•Ç‡§∞‡•ç‡§§‡§ø,‡§´‡•Ç‡§∞‡•ç‡§§‡§ø,100.0
2221,jeevvigyaaniyon,‡§ú‡•Ä‡§µ‡§µ‡§ø‡§ú‡•ç‡§û‡§æ‡§®‡§ø‡§Ø‡•ã‡§Ç,‡§ú‡•Ä‡§µ‡§µ‡§ø‡§ú‡•ç‡§û‡§æ‡§®‡§ø‡§Ø‡•ã‡§Ç,100.0
6969,naamdhari,‡§®‡§æ‡§Æ‡§ß‡§æ‡§∞‡•Ä,‡§®‡§æ‡§Æ‡§ß‡§æ‡§∞‡•Ä,100.0
2223,tugalugi,‡§§‡•Å‡§ó‡§≤‡•Å‡§ó‡•Ä,‡§§‡•Å‡§ó‡§≤‡•Å‡§ó‡•Ä,100.0
6961,khajarana,‡§ñ‡§ú‡§∞‡§æ‡§®‡§æ,‡§ñ‡§ú‡§∞‡§æ‡§®‡§æ,100.0
3724,marsh,‡§Æ‡§æ‡§∞‡•ç‡§∂,‡§Æ‡§æ‡§∞‡•ç‡§∂,100.0



üî∏ Bottom 10 examples by BLEU


Unnamed: 0,english,actual,predicted,bleu
1793,dow,‡§°‡•Ä‡§ì‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç,‡§¶‡•ã,0.83
2773,peeechdablyooseees,‡§™‡•Ä‡§è‡§ö‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§∏‡•Ä‡§è‡§∏,‡§™‡•Ä‡§ö‡•ç‡§õ‡§¶‡§¨‡§≤‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏‡•Ä‡§∏,1.86
586,win,‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§Ü‡§à‡§è‡§®,‡§µ‡§ø‡§®,3.14
4176,forest,‡§´‡•â‡§∞‡•á‡§∏‡•ç‡§ü,‡§µ‡§®,3.71
777,qiwi,‡§ï‡•ç‡§Ø‡•Ç‡§Ü‡§à‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§Ü‡§à,‡§ï‡•ç‡§µ‡•Ä‡§µ‡•Ä,6.49
1614,pwi,‡§™‡•Ä‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§Ü‡§à,‡§™‡•ç‡§µ‡•Ä,7.43
3232,whl,‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§è‡§ö‡§è‡§≤,‡§µ‡§π‡§≤‡•ç,8.22
2162,immachyority,‡§á‡§Æ‡•ç‡§Æ‡•à‡§ö‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä,‡§á‡§Æ‡§æ‡§ö‡•Ä,8.24
2931,dwn,‡§°‡•Ä‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§è‡§®,‡§¶‡§¨‡•ç‡§®,8.83
3393,dwt,‡§°‡•Ä‡§°‡§¨‡•ç‡§≤‡•ç‡§Ø‡•Ç‡§ü‡•Ä,‡§¶‡•ç‡§µ‡•Ä‡§§,9.1



