In [1]:
!pip install -U transformers datasets nltk rouge-score sacrebleu sentence-transformers sentencepiece fsspec==2025.3.2 bert-score --quiet
!pip install indic-nlp-library camel-tools
!pip install unbabel-comet
!pip install evaluate

!pip uninstall -y transformers evaluate unbabel-comet

# Reinstall known compatible versions
!pip install --upgrade transformers evaluate unbabel-comet --no-cache-dir


Collecting transformers<4.44.0,>=4.0 (from camel-tools)
  Using cached transformers-4.43.4-py3-none-any.whl.metadata (43 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0->camel-tools)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers<4.44.0,>=4.0->camel-tools)
  Using cached tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)
Using cached transformers-4.43.4-py3-none-any.whl (9.4 MB)
Using cached tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[0mInstalling collected packages: nvidia-cudnn-cu12, tokenizers, transformers
  Attempting uninstall: tokenizers
[0m    Found existing installation: tokenizers 0.21.4
    Uninstalling tokenizers-0.21.4:
      Successfully uninstalled tokenizers-0.21.4
  Attempting uninstall: 

In [2]:
!pip install unbabel-comet
!pip install evaluate

Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.6.0->unbabel-comet)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)
[0mInstalling collected packages: nvidia-cudnn-cu12
[0mSuccessfully installed nvidia-cudnn-cu12
[0m

In [3]:
# ─── IMPORTS ─────────────────────────────────────────────
import os
import json
import hashlib
import numpy as np
import torch
import pandas as pd
import os
from rouge_score import rouge_scorer
from nltk.translate.meteor_score import meteor_score
import sacrebleu
from sentence_transformers import util
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
from bert_score import score as bert_score

from nltk.tokenize import word_tokenize
from indicnlp.tokenize.indic_tokenize import trivial_tokenize
from camel_tools.tokenizers.word import simple_word_tokenize
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from evaluate import load as evaluate_load

In [4]:
# Install required packages
import nltk
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('punkt_tab')


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [5]:
!pip install -U gdown
!gdown --folder 1QdxrYnelt9poi45eLT5xgObihDRb_OtV -O /content/103080

[0mRetrieving folder contents
Retrieving folder 1rzaJxn-bvoSXzs4Zhyac-YWjH-HxFUEM 1_Pooling
Processing file 1GwEAz43APaFICfzVB_VRreAEvfr1Ra20 config.json
Processing file 1p7CoqRUhOIqXYcTuY_d-qYvZLTjcpxki config_sentence_transformers.json
Processing file 1oUflzLJYyHewOzW_X_iDjsLxd8ppvYzx config.json
Processing file 1IcFz-ITRtZkY6AC-x9ieb5CAdnNrzmsT modules.json
Processing file 115xWQSGruGaZie1V8KVg4_-Xw2DjF8ah pytorch_model.bin
Processing file 1Q7s6JhhsWmoWP09_n6zR9jJM4fU0lTz- README.md
Processing file 1rn6Sue3wcp44qm3whgJtrVfBDkEd9BWH sentence_bert_config.json
Processing file 1BudqNXrBJMzy7tYPxVUezizSpAQs2TMi special_tokens_map.json
Processing file 1nWX9rbnv1V18rJN2tKrWNCI_Ulpp2-ds tokenizer_config.json
Processing file 1Cev_MvvPrZvQ87Z0WcjH96kTVgXY9xkc tokenizer.json
Processing file 1bkHtwyjLYLLwSHaSzvstD_dQWIKchF0B vocab.txt
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1GwE

In [6]:
# Clone updated repo
!git clone https://github.com/DrishtiShrrrma/multilingual-code-summarization-eval.git

# Adjust base_dir to new path for prompt-based summaries
base_dir = "/content/multilingual-code-summarization-eval/prompt_analysis"



Cloning into 'multilingual-code-summarization-eval'...
remote: Enumerating objects: 824, done.[K
remote: Counting objects: 100% (247/247), done.[K
remote: Compressing objects: 100% (193/193), done.[K
remote: Total 824 (delta 175), reused 54 (delta 54), pack-reused 577 (from 1)[K
Receiving objects: 100% (824/824), 4.67 MiB | 6.38 MiB/s, done.
Resolving deltas: 100% (395/395), done.


In [7]:
# ─── CONFIGURATION ──────────────────────────────────────────────
backtranslation_dir = "backtranslations_cache"
os.makedirs(backtranslation_dir, exist_ok=True)


# Mapping for summary field name → Display name
json_field_to_lang = {
    "chinese":     "Chinese",
    "french":      "French",
    "spanish":     "Spanish",
    "portuguese":  "Portuguese",
    "arabic":      "Arabic",
    "hindi":       "Hindi"
}

# Mapping for Display name → M2M-100 language code (used for backtranslation)
bt_lang_code_map = {
    "Chinese":     "zh",
    "French":      "fr",
    "Spanish":     "es",
    "Portuguese":  "pt",
    "Arabic":      "ar",
    "Hindi":       "hi"
}


# Load llamax model
from transformers import AutoModelForCausalLM, AutoTokenizer

bt_model_name = "LLaMAX/LLaMAX3-8B-Alpaca"
bt_model_tag  = "llamax3-8b"
bt_tokenizer  = AutoTokenizer.from_pretrained(bt_model_name)
device = "cuda" if torch.cuda.is_available() else "cpu"
bt_model      = AutoModelForCausalLM.from_pretrained(bt_model_name).to(device)
bt_model.eval()



# Caches
embedding_model     = None
bertscore_model     = None
bertscore_tokenizer = None
side_tokenizer      = None
side_model          = None

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/449 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/913 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00007-of-00007.safetensors:   0%|          | 0.00/2.57G [00:00<?, ?B/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

In [8]:
def sanitize_text(text: str) -> str:
    return (
        text.replace("<|end_of_text|>", "")
            .replace("</s>", "")
            .replace("<|eot_id|>", "")
            .strip()
    )


In [9]:
# ─── BACK-TRANSLATION FUNCTION ─────────────────────────────────────────────
def clean_translation_output(decoded: str):
    decoded = decoded.replace("<|end_of_text|>", "").strip()
    if "### Response:" in decoded:
        return decoded.split("### Response:")[-1].strip()
    if "<|CHATBOT_TOKEN|>" in decoded:
        return decoded.split("<|CHATBOT_TOKEN|>")[-1].split("<|END_OF_TURN_TOKEN|>")[0].strip()
    if "<|im_start|>assistant" in decoded:
        return decoded.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
    if "English:" in decoded:
        return decoded.split("English:")[-1].strip()
    return decoded.strip()



def prompt_template(text, src_lang, tgt_lang):
    return f"Translate the following text from {src_lang} to {tgt_lang}:\n{text.strip()}\nTranslation:"

def bt_function(text, src_lang_name):
    key = f"{src_lang_name}_{bt_model_tag}_{hashlib.md5(text.encode()).hexdigest()}"
    cache_file = os.path.join(backtranslation_dir, key + ".txt")
    if os.path.exists(cache_file):
        return open(cache_file, 'r', encoding='utf-8').read()

    src_lang = json_field_to_lang.get(src_lang_name.lower(), src_lang_name)
    tgt_lang = "English"

    is_encoder_decoder = isinstance(bt_model, AutoModelForSeq2SeqLM)

    if is_encoder_decoder:
        tgt_lang_code = "en"
        src_lang_code = bt_lang_code_map.get(src_lang_name)
        if not src_lang_code:
            return text
        bt_tokenizer.src_lang = src_lang_code
        inputs = bt_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        output_ids = bt_model.generate(**inputs, forced_bos_token_id=bt_tokenizer.get_lang_id(tgt_lang_code))
        output = bt_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    else:
        prompt = prompt_template(text, src_lang, tgt_lang)
        if hasattr(bt_tokenizer, "apply_chat_template"):
            messages = [{"role": "user", "content": prompt}]
            prompt = bt_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = bt_tokenizer(prompt, return_tensors="pt").to(device)
        with torch.no_grad():
            output_ids = bt_model.generate(
                inputs.input_ids,
                max_new_tokens=512,
                pad_token_id=bt_tokenizer.eos_token_id,
                do_sample=False
            )
        decoded = bt_tokenizer.decode(output_ids[0], skip_special_tokens=False, clean_up_tokenization_spaces=False)
        output = clean_translation_output(decoded)

    # Clean up unwanted special token artifacts
    output = output.replace("<|end_of_text|>", "").strip()

    with open(cache_file, 'w', encoding='utf-8') as f:
        f.write(output)

    return sanitize_text(output)




# ─── METRIC FUNCTIONS ──────────────────────────────────────────────────────
def compute_bertscore(refs, hyps):
    P, R, F1 = bert_score(
        hyps,
        refs,
        model_type="xlm-roberta-large",
        lang="en",
        rescale_with_baseline=False
    )
    return {
        "precision": round(P.mean().item(), 4),
        "recall":    round(R.mean().item(), 4),
        "f1":        round(F1.mean().item(), 4)
    }

In [10]:
# ─── MEAN POOLING (for SIDE) ───────────────────────────────────────────────
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]
    mask = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * mask, 1) / torch.clamp(mask.sum(1), min=1e-9)


def compute_side_score(codes, hyps):
    global side_tokenizer, side_model
    if side_model is None:
        checkpoint = "/content/103080"
        side_tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        side_model     = AutoModel.from_pretrained(checkpoint)
        if torch.cuda.is_available(): side_model = side_model.cuda()
        side_model.eval()
    scores = []
    for code, summ in zip(codes, hyps):
        enc = side_tokenizer([code, summ], padding=True, truncation=True, return_tensors="pt")
        if torch.cuda.is_available(): enc = {k:v.cuda() for k,v in enc.items()}
        with torch.no_grad(): out = side_model(**enc)
        pooled = mean_pooling(out, enc['attention_mask'])
        normed = torch.nn.functional.normalize(pooled, p=2, dim=1)
        scores.append(util.pytorch_cos_sim(normed[0], normed[1]).item())
    return round(float(np.mean(scores)),4)

def compute_meteor_score(refs, hyps):
    sc = []
    for r,h in zip(refs, hyps):
        rt = word_tokenize(r.lower()); ht = word_tokenize(h.lower())
        sc.append(meteor_score([rt], ht))
    return round(float(np.mean(sc)),4)

def compute_chrf_score(refs, hyps):
  refs = [r.lower() for r in refs]
  hyps = [h.lower() for h in hyps]

  res = sacrebleu.corpus_chrf(hyps, [refs], word_order=2)
  return round(res.score / 100, 4)

## ----BLEU METRIC-----------
def compute_bleu_sacre(refs, hyps, lang_name):
    lang_name = lang_name.lower()

    # Define tokenizer per language
    tokenizer_map = {
        "chinese": "zh",
        "french": "13a",
        "portuguese": "13a",
        "arabic": "intl",
        "hindi": "intl",
        "spanish": "13a"
    }

    # Default tokenizer if language not found
    tokenizer = tokenizer_map.get(lang_name, "13a")

    # Compute BLEU-4
    score = sacrebleu.corpus_bleu(hyps, [refs], tokenize=tokenizer)
    return round(score.score / 100, 4) # Normalize to 0–1 like nltk



def tokenize(text, lang):
    lang = lang.lower()
    if lang == "chinese":
        return list(text.strip())
    elif lang == "arabic":
        return simple_word_tokenize(text)
    elif lang == "hindi":
        return trivial_tokenize(text, lang='hi')
    elif lang in ["french", "portuguese"]:
        return word_tokenize(text, language=lang)
    else:
        return text.strip().split()


def compute_bleu_nltk(refs_tokenized, hyps_tokenized):
    smoothie = SmoothingFunction().method1
    score = corpus_bleu(
        refs_tokenized,
        hyps_tokenized,
        weights=(0.25, 0.25, 0.25, 0.25),
        smoothing_function=smoothie
    )
    return round(score, 4)

## ----COMET METRIC-----------

comet = evaluate_load("comet", config_name="Unbabel/wmt22-comet-da")

def compute_comet_score(sources, references, hypotheses, batch_size=8, gpus=0):
    result = comet.compute(
        sources=sources,
        predictions=hypotheses,
        references=references,
    )
    per_example = result.get("scores", [])
    mean_score = float(np.mean(per_example)) if per_example else 0.0
    return round(mean_score, 4), per_example


# COMPUTE ALL METRICS
def compute_all_metrics(codes, refs, hyps, lang_name, code_lang):
    print(f"  Computing backtranslation-based metrics for {lang_name}...")
    bt = [bt_function(h, lang_name) for h in hyps]
    smoothie = SmoothingFunction().method4
    refs_tokenized = [[tokenize(r, lang_name)] for r in refs]
    hyps_tokenized = [tokenize(b, lang_name) for b in bt]

    # Compute BLEU using tokenized inputs
    bleu_nltk = compute_bleu_nltk(refs_tokenized, hyps_tokenized)

    bleu_sacre = compute_bleu_sacre(refs, bt, lang_name)
    bleu_diff = round(abs(bleu_nltk - bleu_sacre), 4)

    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    rl = [scorer.score(r, b)['rougeL'].fmeasure for r, b in zip(refs, bt)]
    comet_mean, comet_per_example = compute_comet_score(
        sources=hyps,
        references=refs,
        hypotheses=bt
    )

    return {
        "bleu4_nltk": round(bleu_nltk, 4),
        "bleu4_sacrebleu": bleu_sacre,
        "bleu4_diff": bleu_diff,
        "rougeL": round(np.mean(rl), 4),
        "meteor": compute_meteor_score(refs, bt),
        "chrf++": compute_chrf_score(refs, bt),
        "side_bt": compute_side_score(codes, bt),
        "comet_mean": comet_mean,
        "comet_per_example": comet_per_example
    }



Downloading builder script: 0.00B [00:00, ?B/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

.gitattributes: 0.00B [00:00, ?B/s]

README.md: 0.00B [00:00, ?B/s]

LICENSE: 0.00B [00:00, ?B/s]

hparams.yaml:   0%|          | 0.00/567 [00:00<?, ?B/s]

checkpoints/model.ckpt:   0%|          | 0.00/2.32G [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/huggingface/hub/models--Unbabel--wmt22-comet-da/snapshots/2760a223ac957f30acfb18c8aa649b01cf1d75f2/checkpoints/model.ckpt`


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

/usr/local/lib/python3.11/dist-packages/pytorch_lightning/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


In [11]:
# ─── BACKTRANSLATION SANITY TEST ─────────────────────────────────────────────
print("\n🔍 Running backtranslation test...")

sample_inputs = {
    "Chinese": "我喜欢自然语言处理。",
    "French": "J'aime le traitement automatique des langues.",
    "Arabic": "أنا أحب معالجة اللغة الطبيعية.",
}

for lang_name, input_text in sample_inputs.items():
    print(f"\n🌐 {lang_name} Input: {input_text}")
    output = bt_function(input_text, lang_name)
    print(f"📝 Backtranslated Output: {output}")


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



🔍 Running backtranslation test...

🌐 Chinese Input: 我喜欢自然语言处理。
📝 Backtranslated Output: My interest lies in Natural Language Processing.

🌐 French Input: J'aime le traitement automatique des langues.
📝 Backtranslated Output: I love automatic language processing.

🌐 Arabic Input: أنا أحب معالجة اللغة الطبيعية.
📝 Backtranslated Output: I love natural language processing.


In [12]:
from collections import OrderedDict

def insert_backtranslations(data):
    for entry in data:
        new_entry = OrderedDict()
        for key, value in entry.items():
            new_entry[key] = value
            if key.startswith("summary_"):
                lang_code = key.replace("summary_", "")
                if lang_code in json_field_to_lang:
                    lang_name = json_field_to_lang[lang_code]
                    gen = entry.get(key, "").strip()
                    if gen:
                        bt_key = f"bt_{lang_code}"
                        new_entry[bt_key] = sanitize_text(bt_function(gen, lang_name))
        entry.clear()
        entry.update(new_entry)


In [13]:
from collections import Counter


In [14]:
# ─── MAIN EVALUATION ─────────────────────────────────────
def run_evaluation():
    all_results = []
    base_dir = "/content/multilingual-code-summarization-eval/prompt_analysis"
    model_folders = ["codegemma", "gemma-2-9b-it", "qwen2.5coder", "deepseekcoder"]
    prompt_subdirs = ["prompt0"]

    bt_json_dir = "backtranslated_jsons"
    os.makedirs(bt_json_dir, exist_ok=True)

    for model_folder in model_folders:
        for prompt in prompt_subdirs:
            prompt_path = os.path.join(base_dir, model_folder, prompt)
            if not os.path.isdir(prompt_path):
                continue

            for fname in os.listdir(prompt_path):
                if not fname.endswith(".json") or not fname.startswith("all_languages_prompt"):
                    continue

                summary_path = os.path.join(prompt_path, fname)
                print(f"\nProcessing file: {summary_path}")

                with open(summary_path, encoding='utf-8') as f:
                    data = json.load(f)

                if not data:
                    print("  Skipped: empty file")
                    continue

                codes = [d.get("code", "") for d in data]
                refs = [sanitize_text(d.get("summary_english", d.get("docstring", ""))) for d in data]

                model_name = data[0].get("model_name", model_folder)
                prompt_used = data[0].get("prompt_used", prompt)

                for field, lang_name in json_field_to_lang.items():
                    hyp_key = f"summary_{field}"
                    if hyp_key not in data[0]:
                        print(f"  Skipping {lang_name} — {hyp_key} not found.")
                        continue

                    hyps = [sanitize_text(d.get(hyp_key, "")) for d in data]
                    if not any(hyps):
                        print(f"  Skipping {lang_name} — all summaries empty.")
                        continue

                    print(f"  → Evaluating summaries in {lang_name}...")
                    bert = compute_bertscore(refs, hyps)
                    side_original = compute_side_score(codes, hyps)
                    # Estimate dominant programming language for this batch
                    batch_langs = [entry.get("language", "unknown") for entry in data]
                    code_lang = Counter(batch_langs).most_common(1)[0][0]
                    metrics = compute_all_metrics(codes, refs, hyps, lang_name, code_lang)
                    side_drop = round(side_original - metrics["side_bt"], 4)

                    for i, entry in enumerate(data):
                        code_lang = entry.get("language", "unknown")
                        print(f"[DEBUG] Sample ID: {entry.get('id')} → programming_language = {code_lang}")

                        code = entry.get("code", "")
                        sample_id = entry.get("id", f"{code_lang}_{i}")
                        full_func = entry.get("whole_func_string", code)
                        word_len = len(full_func.strip().split())

                        generated_summary = sanitize_text(entry.get(hyp_key, ""))
                        backtranslated_summary = sanitize_text(bt_function(generated_summary, lang_name))
                        reference_summary = sanitize_text(entry.get("summary_english", entry.get("docstring", "")))

                        result = {
                            "sample_id": sample_id,
                            "model_folder_name": model_folder,
                            "model_name": model_name,
                            "programming_language": code_lang,
                            "language": entry.get("language", "unknown"),
                            "prompt_used": prompt_used,
                            "bt_model": bt_model_tag,
                            "word_len": word_len,
                            "length_bucket": entry.get("length_bucket", "unknown"),
                            "reference_summary": reference_summary,
                            "generated_summary": generated_summary,
                            "backtranslated_summary": backtranslated_summary,
                            "bertscore_f1": bert["f1"],
                            "bertscore_precision": bert["precision"],
                            "bertscore_recall": bert["recall"],
                            "side_original": side_original,
                            "side_bt": metrics["side_bt"],
                            "side_drop": side_drop,
                            "bleu4_nltk": metrics["bleu4_nltk"],
                            "bleu4_sacrebleu": metrics["bleu4_sacrebleu"],
                            "bleu4_diff": metrics["bleu4_diff"],
                            "rougeL": metrics["rougeL"],
                            "meteor": metrics["meteor"],
                            "chrf++": metrics["chrf++"],
                            "comet_mean": metrics["comet_mean"],
                            "comet_example_score": metrics["comet_per_example"][i] if i < len(metrics["comet_per_example"]) else None
                        }

                        all_results.append(result)

                # Insert backtranslations into data and save
                insert_backtranslations(data)
                enhanced_fname = os.path.basename(summary_path).replace(".json", f"_with_bt_{bt_model_tag}.json")
                enhanced_fpath = os.path.join(bt_json_dir, enhanced_fname)
                with open(enhanced_fpath, "w", encoding='utf-8') as f:
                    json.dump(data, f, indent=2, ensure_ascii=False)

    # Save metric results
    os.makedirs(backtranslation_dir, exist_ok=True)
    json_out = os.path.join(backtranslation_dir, f"new_all_scores_bt_{bt_model_tag}.json")
    csv_out = os.path.join(backtranslation_dir, f"new_all_scores_bt_{bt_model_tag}.csv")

    with open(json_out, "w", encoding='utf-8') as f:
        json.dump(all_results, f, indent=2, ensure_ascii=False)

    pd.DataFrame(all_results).to_csv(csv_out, index=False)

    print(f"\nSaved results to:\n  JSON: {json_out}\n  CSV:  {csv_out}")



run_evaluation()



Processing file: /content/multilingual-code-summarization-eval/prompt_analysis/codegemma/prompt0/all_languages_prompt0_combined_codegemma-7b-it.json
  → Evaluating summaries in Chinese...


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

  return forward_call(*args, **kwargs)


  Computing backtranslation-based metrics for Chinese...


INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_ca

[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  return forward_call(*args, **kwargs)


[DEBUG] Sample ID: sample_68262 → programming_language = go
[DEBUG] Sample ID: sample_59144 → programming_language = go
[DEBUG] Sample ID: sample_56854 → programming_language = go
[DEBUG] Sample ID: sample_60736 → programming_language = go
[DEBUG] Sample ID: sample_60808 → programming_language = go
[DEBUG] Sample ID: sample_65047 → programming_language = go
[DEBUG] Sample ID: sample_68705 → programming_language = go
[DEBUG] Sample ID: sample_56245 → programming_language = go
[DEBUG] Sample ID: sample_69232 → programming_language = go
[DEBUG] Sample ID: sample_43225 → programming_language = java
[DEBUG] Sample ID: sample_43881 → programming_language = java
[DEBUG] Sample ID: sample_24461 → programming_language = java
[DEBUG] Sample ID: sample_34565 → programming_language = java
[DEBUG] Sample ID: sample_26644 → programming_language = java
[DEBUG] Sample ID: sample_26982 → programming_language = java
[DEBUG] Sample ID: sample_41842 → programming_language = java
[DEBUG] Sample ID: sample_

In [16]:
from google.colab import files

# Paths to the generated files
json_out = os.path.join(backtranslation_dir, f"new_all_scores_bt_{bt_model_tag}.json")
csv_out = os.path.join(backtranslation_dir, f"new_all_scores_bt_{bt_model_tag}.csv")

# Download the files
files.download(json_out)
files.download(csv_out)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>