In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/project-multi/test_random_400-2.csv
/kaggle/input/project-multi/test_top_cosine_200-2.csv
/kaggle/input/project-multi/test_top_rougeL_200-2.csv
/kaggle/input/project-multi/train_data.csv
/kaggle/input/project-multi/test_random_600-2.csv


In [3]:
!pip install -U "transformers==4.44.2" "accelerate==0.34.2" "peft==0.11.1" optuna evaluate rouge_score

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting transformers==4.44.2
  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate==0.34.2
  Downloading accelerate-0.34.2-py3-none-any.whl.metadata (19 kB)
Collecting peft==0.11.1
  Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tokenizers<0.20,>=0.19 (from transformers==4.44.2)
  Downloading tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting fsspec>=2021.05.0 (from fsspec[http]>=2021.05.0->evaluate)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Col

In [6]:
from huggingface_hub import login

login("your_huggingface_token_here")

# T5

In [4]:


import os, time, json
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# ========= CONFIG =========
MODEL_NAME     = "google-t5/t5-3b"
INPUT_FILES    = {
    "test400": "/kaggle/input/project-multi/test_random_400-2.csv",
    "test600": "/kaggle/input/project-multi/test_random_600-2.csv",
}
INPUT_COL      = "description_html_clean"   
OUTPUT_DIR     = "./t5_infer"
MAX_SOURCE_LEN = 512

GEN_KWARGS = dict(
    max_new_tokens=64,      
    num_beams=4,
    do_sample=False,
    no_repeat_ngram_size=3,
    length_penalty=1.0,
    early_stopping=True,
)


USE_APP_PROMPT = False
def build_prompt_for_app(html_text: str) -> str:
    return (
        "summarize: You are an expert app store editor. "
        "Given the following app description in HTML format, summarize it in 2-3 sentences, "
        "with a concise, engaging short description (max 80 characters) suitable for an app store listing. "
        f"App Description HTML:\n{html_text}\n"
        "Format your response as:\n"
        "Short Description: <your short description>\n\n"
    )


print("⏳ Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForSeq2SeqLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto",
)
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
print("✅ Model ready.")

os.makedirs(OUTPUT_DIR, exist_ok=True)

RUN_SCHEDULE = ["test400", "test600", "test400", "test600"]
run_counters = {k: 0 for k in INPUT_FILES.keys()}

for tag in RUN_SCHEDULE:
    run_counters[tag] += 1
    run_id = run_counters[tag]

    file_path = INPUT_FILES[tag]
    assert os.path.exists(file_path), f"Missing: {file_path}"
    df = pd.read_csv(file_path)
    assert INPUT_COL in df.columns, f"Missing column: {INPUT_COL}"

    print(f"▶️ Running {tag}, round {run_id} ...")
    pred_rows = []
    infer_log_path = os.path.join(OUTPUT_DIR, f"log_{tag}_run{run_id}.jsonl")
    pred_path      = os.path.join(OUTPUT_DIR, f"pred_{tag}_run{run_id}.csv")

    with open(infer_log_path, "w", encoding="utf-8") as f_log:
        for i, row in df.iterrows():
            raw_src = str(row[INPUT_COL])
            src_text = build_prompt_for_app(raw_src) if USE_APP_PROMPT else raw_src

            
            inputs = tokenizer(
                src_text,
                return_tensors="pt",
                truncation=True,
                max_length=MAX_SOURCE_LEN,
            )
            inputs = {k: v.to(device) for k, v in inputs.items()}

           
            input_tokens = int(inputs["attention_mask"].sum().item())

            
            t0 = time.time()
            with torch.inference_mode():
                outputs = model.generate(**inputs, **GEN_KWARGS)
            latency = time.time() - t0

            
            decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

       
            gen_tokens = int((outputs[0] != tokenizer.pad_token_id).sum().item())

       
            f_log.write(json.dumps({
                "index": i,
                "input_tokens": input_tokens,
                "gen_tokens": gen_tokens,
                "latency_sec": round(latency, 4),
            }, ensure_ascii=False) + "\n")

            pred_rows.append({"index": i, "prediction": decoded})

    pd.DataFrame(pred_rows).to_csv(pred_path, index=False, encoding="utf-8")
    print(f"Done {tag} run {run_id}: wrote {len(pred_rows)} rows → {pred_path}")


⏳ Loading model...


config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]



model.safetensors:   0%|          | 0.00/11.4G [00:00<?, ?B/s]

✅ Model ready.
▶️ Running test400, round 1 ...


2025-10-02 13:04:30.493682: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759410270.885328      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759410270.995466      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


✅ Done test400 run 1: wrote 400 rows → ./t5_infer/pred_test400_run1.csv
▶️ Running test600, round 1 ...
✅ Done test600 run 1: wrote 600 rows → ./t5_infer/pred_test600_run1.csv
▶️ Running test400, round 2 ...
✅ Done test400 run 2: wrote 400 rows → ./t5_infer/pred_test400_run2.csv
▶️ Running test600, round 2 ...
✅ Done test600 run 2: wrote 600 rows → ./t5_infer/pred_test600_run2.csv


# Peagsus xsum

In [3]:

import os, time, json
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# ========= CONFIG =========
MODEL_NAME    = "google/pegasus-large"
INPUT_CSV     = "/kaggle/input/project-multi/test_random_400-2.csv" 
INPUT_COL     = "description_html_clean"   
OUTPUT_DIR    = "./pegasus_xsum_infer"
PRED_CSV      = "pegasus_xsum_predictions.csv"
INFER_LOG     = "inference_token_time.jsonl"
USE_APP_PROMPT = False      
MAX_SOURCE_LEN = 512


GEN_KWARGS = dict(
    max_new_tokens=64,
    num_beams=4,
    do_sample=False,
    no_repeat_ngram_size=3,
    length_penalty=1.0,
    early_stopping=True,
)


def build_prompt_for_app(html_text: str) -> str:
    return (
        "You are an expert app store editor. "
        "Given the following app description in HTML format, summarize it in 2-3 sentences, "
        "with a concise, engaging short description (max 80 characters) suitable for an app store listing. "
        f"App Description HTML:\n{html_text}\n"
        "Format your response as:\n"
        "Short Description: <your short description>\n\n"
    )


assert os.path.exists(INPUT_CSV), f"Missing: {INPUT_CSV}"
df = pd.read_csv(INPUT_CSV)
assert INPUT_COL in df.columns, f"Missing column: {INPUT_COL}"

os.makedirs(OUTPUT_DIR, exist_ok=True)
infer_log_path = os.path.join(OUTPUT_DIR, INFER_LOG)
pred_path      = os.path.join(OUTPUT_DIR, PRED_CSV)

# ========= LOAD MODEL =========
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)


pred_rows = []
with open(infer_log_path, "w", encoding="utf-8") as f_log:
    for i, row in df.iterrows():
        raw_src = str(row[INPUT_COL])

        src_text = build_prompt_for_app(raw_src) if USE_APP_PROMPT else raw_src

        
        inputs = tokenizer(
            src_text,
            return_tensors="pt",
            truncation=True,
            max_length=MAX_SOURCE_LEN,
        )
        inputs = {k: v.to(device) for k, v in inputs.items()}

        
        input_tokens = int(inputs["attention_mask"].sum().item()) if "attention_mask" in inputs else int((inputs["input_ids"] != tokenizer.pad_token_id).sum().item())

        
        t0 = time.time()
        with torch.no_grad():
            out = model.generate(**inputs, **GEN_KWARGS)
        latency = time.time() - t0

        text = tokenizer.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)

        
        gen_ids = out[0]
        try:
            output_tokens = int((gen_ids != tokenizer.pad_token_id).sum().item())
        except Exception:
            output_tokens = int(len(gen_ids))
        total_tokens = input_tokens + output_tokens
        tps = total_tokens / latency if latency > 0 else None


        pred_rows.append({"index": i, "prediction": text})


        f_log.write(json.dumps({
            "event": "inference",
            "row_index": int(i),
            "input_tokens": int(input_tokens),
            "output_tokens": int(output_tokens),
            "total_tokens": int(total_tokens),
            "latency_sec": float(latency),
            "tokens_per_sec": float(tps) if tps is not None else None,
            "gen_kwargs": GEN_KWARGS,
            "timestamp": time.time(),
        }) + "\n")


pd.DataFrame(pred_rows).to_csv(pred_path, index=False, encoding="utf-8")
print("Saved predictions to:", pred_path)
print("Inference token/time logs:", infer_log_path)

tokenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-large and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/260 [00:00<?, ?B/s]

Saved predictions to: ./pegasus_xsum_infer/pegasus_xsum_predictions.csv
Inference token/time logs: ./pegasus_xsum_infer/inference_token_time.jsonl


In [7]:


import os, time, json, gc
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# ========= CONFIG =========
MODELS = {
    #"pegasus-large":       "google/pegasus-large",
    "pegasus-xsum": "google/pegasus-xsum",
}
INPUT_FILES = {
    "test400": "/kaggle/input/project-multi/test_random_400-2.csv",
    "test600": "/kaggle/input/project-multi/test_random_600-2.csv",
}
RUN_SCHEDULE = ["test400","test400"]

INPUT_COL      = "description_html_clean"
OUTPUT_ROOT    = "./pegasus_infer_schedule"
MAX_SOURCE_LEN = 512

GEN_KWARGS = dict(
    max_new_tokens=64,
    num_beams=4,
    do_sample=False,
    no_repeat_ngram_size=3,
    length_penalty=1.0,
    early_stopping=True,
)

USE_APP_PROMPT = False
def build_prompt_for_app(html_text: str) -> str:
    return (
        "You are an expert app store editor. "
        "Given the following app description in HTML format, summarize it in 2-3 sentences, "
        "with a concise, engaging short description (max 80 characters) suitable for an app store listing. "
        f"App Description HTML:\n{html_text}\n"
        "Format your response as:\n"
        "Short Description: <your short description>\n\n"
    )


os.makedirs(OUTPUT_ROOT, exist_ok=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

for model_tag, model_name in MODELS.items():
    print(f"\n==============================")
    print(f" Loading model: {model_name} ({model_tag})")
    print(f"==============================")

    OUT_DIR = os.path.join(OUTPUT_ROOT, model_tag)
    os.makedirs(OUT_DIR, exist_ok=True)

    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    model.eval().to(device)
    print("Model ready.")

    run_counters = {"test400": 0, "test600": 0}

    for tag in RUN_SCHEDULE:
        run_counters[tag] += 1
        run_id = run_counters[tag]

        file_path = INPUT_FILES[tag]
        assert os.path.exists(file_path), f"Missing: {file_path}"
        df = pd.read_csv(file_path)
        assert INPUT_COL in df.columns, f"Missing column: {INPUT_COL}"

        print(f"▶️ [{model_tag}] Running {tag}, round {run_id} ...")
        pred_rows = []
        infer_log_path = os.path.join(OUT_DIR, f"log_{tag}_run{run_id}.jsonl")
        pred_path      = os.path.join(OUT_DIR, f"pred_{tag}_run{run_id}.csv")

        with open(infer_log_path, "w", encoding="utf-8") as f_log:
            for i, row in df.iterrows():
                raw_src = str(row[INPUT_COL])
                src_text = build_prompt_for_app(raw_src) if USE_APP_PROMPT else raw_src

             
                inputs = tokenizer(
                    src_text,
                    return_tensors="pt",
                    truncation=True,
                    max_length=MAX_SOURCE_LEN,
                )
                inputs = {k: v.to(device) for k, v in inputs.items()}
                input_tokens = int(inputs["attention_mask"].sum().item())

             
                t0 = time.time()
                with torch.no_grad():
                    out = model.generate(**inputs, **GEN_KWARGS)
                latency = time.time() - t0

              
                text = tokenizer.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)

                gen_ids = out[0]
                try:
                    output_tokens = int((gen_ids != tokenizer.pad_token_id).sum().item())
                except Exception:
                    output_tokens = int(len(gen_ids))
                total_tokens = input_tokens + output_tokens
                tps = total_tokens / latency if latency > 0 else None

                pred_rows.append({"index": i, "prediction": text})

                f_log.write(json.dumps({
                    "event": "inference",
                    "row_index": int(i),
                    "input_tokens": input_tokens,
                    "output_tokens": output_tokens,
                    "total_tokens": total_tokens,
                    "latency_sec": float(latency),
                    "tokens_per_sec": float(tps) if tps else None,
                    "gen_kwargs": GEN_KWARGS,
                    "timestamp": time.time(),
                }) + "\n")

        pd.DataFrame(pred_rows).to_csv(pred_path, index=False, encoding="utf-8")
        print(f"[{model_tag}] Done {tag} run {run_id}: {len(pred_rows)} rows → {pred_path}")


    del model
    del tokenizer
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()


 Loading model: google/pegasus-xsum (pegasus-xsum)


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model ready.
▶️ [pegasus-xsum] Running test400, round 1 ...
✅ [pegasus-xsum] Done test400 run 1: 400 rows → ./pegasus_infer_schedule/pegasus-xsum/pred_test400_run1.csv
▶️ [pegasus-xsum] Running test400, round 2 ...
✅ [pegasus-xsum] Done test400 run 2: 400 rows → ./pegasus_infer_schedule/pegasus-xsum/pred_test400_run2.csv


In [11]:
!zip -r -q /kaggle/working//pegasus_xsum_infer.zip /kaggle/working/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


# BART

In [5]:

import os, time, csv
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# ----------- CONFIG -----------
TEST_CSV = "/kaggle/input/project-multi/test_random_400-2.csv"
MODEL_NAME = "facebook/bart-large"      
PRED_CSV = "bart_predictions_zero_shot.csv"
INFER_LOG_CSV = "inference_logs_zero_shot.csv"
MAX_SOURCE_LEN = 1000                  

GEN_KW = dict(
    max_new_tokens=64,                
    num_beams=4,                       
    no_repeat_ngram_size=3,            
    length_penalty=2.0,                 
    early_stopping=True,
    do_sample=False,                    
)


assert os.path.exists(TEST_CSV), f"Missing: {TEST_CSV}"
df = pd.read_csv(TEST_CSV)

TEST_INPUT_COL = None
for cand in ["description_html_clean", "description_html"]:
    if cand in df.columns:
        TEST_INPUT_COL = cand
        break
assert TEST_INPUT_COL is not None, "Test CSV cần 'description_html_clean' hoặc 'description_html'"

print("Loading base model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print("Model ready on", device)


with open(INFER_LOG_CSV, "w", newline="", encoding="utf-8") as f:
    csv.writer(f).writerow(["index","input_tokens","gen_tokens","latency_sec"])

@torch.inference_mode()
def infer_one(html: str, idx: int) -> str:

    src = str(html)
    enc = tokenizer(
        src,
        return_tensors="pt",
        truncation=True,
        max_length=MAX_SOURCE_LEN,
    )
    enc = {k: v.to(device) for k, v in enc.items()}

    t0 = time.time()
    out = model.generate(**enc, **GEN_KW)
    dur = time.time() - t0

    # log
    input_tokens = int(enc["attention_mask"].sum().item())
    gen_tokens = int(out.shape[1] - enc["input_ids"].shape[1])
    with open(INFER_LOG_CSV, "a", newline="", encoding="utf-8") as f:
        csv.writer(f).writerow([idx, input_tokens, gen_tokens, f"{dur:.4f}"])

    text = tokenizer.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True).strip()
    return text


preds = []
for i, html in enumerate(df[TEST_INPUT_COL].astype(str).tolist()):
    preds.append(infer_one(html, i))

df["bart_pred"] = preds
df.to_csv(PRED_CSV, index=False, encoding="utf-8")
print(f"Done. Saved predictions to: {PRED_CSV}")
print(f"Logs at: {INFER_LOG_CSV}")

⏳ Loading base model...




✅ Model ready on cuda
🎯 Done. Saved predictions to: bart_predictions_zero_shot.csv
🗒  Logs at: inference_logs_zero_shot.csv


In [3]:

import os, time, csv
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# ===== CONFIG =====
INPUT_FILES = {
    "test400": "/kaggle/input/project-multi/test_random_400-2.csv",
    "test600": "/kaggle/input/project-multi/test_random_600-2.csv",
}
RUN_SCHEDULE = ["test400", "test600", "test400", "test600"] 
MODELS = {
    "bart-base": "facebook/bart-large",
    #"bart-xsum": "facebook/bart-large-xsum",
}

MAX_SOURCE_LEN = 1000
GEN_KW = dict(
    max_new_tokens=64,     
    num_beams=4,
    no_repeat_ngram_size=3,
    length_penalty=2.0,
    early_stopping=True,
    do_sample=False,
)

INPUT_COL_CANDIDATES = ["description_html_clean", "description_html"]

def run_one_round(model_tag, model_name, test_tag, run_id):
    test_file = INPUT_FILES[test_tag]
    assert os.path.exists(test_file), f"Missing: {test_file}"
    df = pd.read_csv(test_file)

    test_input_col = None
    for c in INPUT_COL_CANDIDATES:
        if c in df.columns:
            test_input_col = c
            break
    assert test_input_col is not None, f"CSV {test_file} cần 1 trong các cột {INPUT_COL_CANDIDATES}"

    pred_csv = f"{model_tag}_{test_tag}_run{run_id}_pred.csv"
    log_csv  = f"{model_tag}_{test_tag}_run{run_id}_log.csv"

    print(f"\n Loading {model_name} | {test_tag} (run {run_id})")
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    model.eval()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    print(f"Model ready on {device}")

    with open(log_csv, "w", newline="", encoding="utf-8") as f:
        csv.writer(f).writerow(["index", "input_tokens", "gen_tokens", "latency_sec"])

    @torch.inference_mode()
    def infer_one(text: str, idx: int) -> str:
        enc = tokenizer(
            str(text),
            return_tensors="pt",
            truncation=True,
            max_length=MAX_SOURCE_LEN,
        )
        enc = {k: v.to(device) for k, v in enc.items()}

        t0 = time.time()
        out = model.generate(**enc, **GEN_KW)
        dur = time.time() - t0

        # input tokens
        input_tokens = int(enc["attention_mask"].sum().item())

        # output tokens (seq2seq generate trả riêng decoder output)
        gen_tokens = int((out[0] != tokenizer.pad_token_id).sum().item())

        with open(log_csv, "a", newline="", encoding="utf-8") as f:
            csv.writer(f).writerow([idx, input_tokens, gen_tokens, f"{dur:.4f}"])

        return tokenizer.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True).strip()

    preds = []
    for i, src in enumerate(df[test_input_col].astype(str).tolist()):
        preds.append(infer_one(src, i))

    df[f"{model_tag}_pred"] = preds
    df.to_csv(pred_csv, index=False, encoding="utf-8")
    print(f"Saved predictions → {pred_csv}")
    print(f"Logs → {log_csv}")

# ===== MAIN (interleaved schedule) =====
if __name__ == "__main__":
    for model_tag, model_name in MODELS.items():
       
        counters = {"test400": 0, "test600": 0}
        for test_tag in RUN_SCHEDULE:
            counters[test_tag] += 1
            run_one_round(model_tag, model_name, test_tag, counters[test_tag])


⏳ Loading facebook/bart-large | test400 (run 1)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]



pytorch_model.bin:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

✅ Model ready on cuda
🎯 Saved predictions → bart-base_test400_run1_pred.csv
🗒  Logs → bart-base_test400_run1_log.csv

⏳ Loading facebook/bart-large | test600 (run 1)




✅ Model ready on cuda
🎯 Saved predictions → bart-base_test600_run1_pred.csv
🗒  Logs → bart-base_test600_run1_log.csv

⏳ Loading facebook/bart-large | test400 (run 2)




✅ Model ready on cuda
🎯 Saved predictions → bart-base_test400_run2_pred.csv
🗒  Logs → bart-base_test400_run2_log.csv

⏳ Loading facebook/bart-large | test600 (run 2)




✅ Model ready on cuda
🎯 Saved predictions → bart-base_test600_run2_pred.csv
🗒  Logs → bart-base_test600_run2_log.csv
