In [None]:
!pip install -q transformers datasets peft accelerate

from google.colab import files
uploaded = files.upload()

import json
with open("commandline_qa.json") as f:
    data = json.load(f)

texts = [f"Q: {qa['question']}\nA: {qa['answer']}" for qa in data]
print(f"Loaded {len(texts)} Q&A pairs")

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType

model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

from datasets import Dataset
dataset = Dataset.from_dict({"text": texts})

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

def add_labels(examples):
    examples["labels"] = examples["input_ids"].copy()
    return examples

tokenized_dataset = tokenized_dataset.map(add_labels, batched=True)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./lora-gptneo",
    per_device_train_batch_size=2,
    num_train_epochs=1,
    logging_steps=10,
    save_steps=100,
    save_total_limit=1,
    learning_rate=3e-4,
    report_to=[],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)

trainer.train()

trainer.save_model("./lora-gptneo")
print("✅ LoRA adapter saved to ./lora-gptneo")



Saving commandline_qa.json to commandline_qa (6).json
Loaded 180 Q&A pairs


Map:   0%|          | 0/180 [00:00<?, ? examples/s]

Map:   0%|          | 0/180 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 589,824 || all params: 125,788,416 || trainable%: 0.4689


Step,Training Loss
10,5.7228
20,4.2632
30,3.1692
40,1.6862
50,1.5039
60,1.9746
70,1.3211
80,1.7022
90,1.391


✅ LoRA adapter saved to ./lora-gptneo


In [None]:
!ls -l ./lora-gptneo



total 2336
-rw-r--r-- 1 root root     779 Jun 17 12:58 adapter_config.json
-rw-r--r-- 1 root root 2365872 Jun 17 12:58 adapter_model.safetensors
drwxr-xr-x 2 root root    4096 Jun 17 12:58 checkpoint-90
-rw-r--r-- 1 root root    5097 Jun 17 12:58 README.md
-rw-r--r-- 1 root root    5240 Jun 17 12:58 training_args.bin


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
base_model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
model = PeftModel.from_pretrained(base_model, "./lora-gptneo")
model.eval()


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPTNeoForCausalLM(
      (transformer): GPTNeoModel(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(2048, 768)
        (drop): Dropout(p=0.0, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPTNeoBlock(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPTNeoAttention(
              (attention): GPTNeoSelfAttention(
                (attn_dropout): Dropout(p=0.0, inplace=False)
                (resid_dropout): Dropout(p=0.0, inplace=False)
                (k_proj): Linear(in_features=768, out_features=768, bias=False)
                (v_proj): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=False)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.05, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_feature

In [None]:
import json
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

def is_shell_command(line):
    shell_cmd_starts = (
        "cd ", "ls", "git ", "mkdir", "rm", "echo", "touch",
        "python", "./", "sudo", "cat ", "cp ", "mv ", "pwd"
    )
    return line.strip().startswith(shell_cmd_starts)

def main():
    instruction = "Create a new Git branch and switch to it"
    base_model_name = "EleutherAI/gpt-neo-125M"
    print(f"Loading base model '{base_model_name}' and tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
    print("Loading LoRA adapter...")
    model = PeftModel.from_pretrained(base_model, "./lora-gptneo")
    model.eval()
    prompt = f"Instruction: {instruction}\nSteps:"

    inputs = tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id,
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    steps_text = generated_text.split("Steps:")[-1].strip()
    steps = [line.strip() for line in steps_text.split("\n") if line.strip()]

    os.makedirs("logs", exist_ok=True)
    log_path = "logs/trace.jsonl"

    log_entries = []

    print("\nGenerated steps:")
    for idx, step in enumerate(steps, start=1):
        dry_run = False
        if idx == 1 and is_shell_command(step):
            dry_run = True
            print(f"(Dry-run) Shell command: {step}")
        else:
            print(f"Step {idx}: {step}")

        log_entries.append({"step": idx, "text": step, "dry_run": dry_run})
    with open(log_path, "a") as f:
        for entry in log_entries:
            f.write(json.dumps(entry) + "\n")

if __name__ == "__main__":
    main()



Loading base model 'EleutherAI/gpt-neo-125M' and tokenizer...
Loading LoRA adapter...

Generated steps:
Step 1: 1. Create a new Git branch
Step 2: 2. Select the Git branch you want to create
Step 3: 3. Select the Git branch you want to create


In [None]:
import os

if os.path.exists("./lora-gptneo"):
    print("LoRA adapter folder './lora-gptneo' found!")
    print("Files:", os.listdir("./lora-gptneo"))
else:
    print("LoRA adapter folder './lora-gptneo' NOT found. Please train and save the adapter first.")


LoRA adapter folder './lora-gptneo' found!
Files: ['README.md', 'checkpoint-90', 'adapter_model.safetensors', 'training_args.bin', 'adapter_config.json']


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
base_model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
model = PeftModel.from_pretrained(base_model, "./lora-gptneo")
model.eval()

instruction = "Create a new Git branch and switch to it"
prompt = f"Instruction: {instruction}\nSteps:"

inputs = tokenizer(prompt, return_tensors="pt")

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id,
    )

generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Generated output:\n")
print(generated_text)


Generated output:

Instruction: Create a new Git branch and switch to it
Steps:

1. Create a new Git branch
2. Select the Git branch you want to create
3. Select the Git branch you want to create



In [None]:
!python agent.py "Create a new Git branch and switch to it"


2025-06-17 13:26:40.435169: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750166800.878780   17062 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750166801.040408   17062 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
✅ Loading fine-tuned LoRA adapter...

📋 Generated Steps:
Step 1: 1. Create a new Git branch
Step 2: 2. Select the Git branch you want to create
Step 3: 3. Select the Git branch you want to create


In [None]:
eval_prompts = [
    "Create a new Git branch and switch to it.",
    "Compress the folder reports into reports.tar.gz.",
    "List all Python files in the current directory recursively.",
    "Set up a virtual environment and install requests.",
    "Fetch only the first ten lines of a file named output.log.",

    "Delete all .log files older than 7 days in /var/log.",
    "Rename all .jpeg files in the current directory to .jpg."
]


In [None]:
pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=f0c692c4d68db249344ec8a89cb93b0555053f78ca1d50a8aed06c26c1349834
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
from rouge_score import rouge_scorer
import os

base_model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)

fine_tuned = PeftModel.from_pretrained(base_model, "./lora-gptneo")
fine_tuned.eval()

def format_prompt(instr):
    return f"Instruction: {instr}\nSteps:"

# Generate output
def generate(model, prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(output[0], skip_special_tokens=True).split("Steps:")[-1].strip()
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
results = []

for i, prompt in enumerate(eval_prompts, 1):
    formatted = format_prompt(prompt)
    base_output = generate(base_model, formatted)
    tuned_output = generate(fine_tuned, formatted)
    score = scorer.score(base_output, tuned_output)['rougeL'].fmeasure
    results.append((prompt, base_output, tuned_output, score))
os.makedirs("logs", exist_ok=True)
with open("eval_static.md", "w") as f:
    f.write("# 📊 Static Evaluation: Base vs Fine-Tuned Outputs\n\n")
    for idx, (prompt, base_out, tuned_out, rougeL) in enumerate(results, 1):
        f.write(f"## Prompt {idx}: {prompt}\n\n")
        f.write(f"**Base Model Output:**\n```\n{base_out}\n```\n")
        f.write(f"**Fine-Tuned Output:**\n```\n{tuned_out}\n```\n")
        f.write(f"**ROUGE-L Score:** `{rougeL:.4f}`\n\n")
        f.write("---\n\n")

print("✅ eval_static.md saved.")


✅ eval_static.md saved.


In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
import json
with open("commandline_qa.json") as f:
    data = json.load(f)

texts = [f"Q: {qa['question']}\nA: {qa['answer']}" for qa in data]
print(f"Loaded {len(texts)} Q&A pairs")

from datasets import Dataset
dataset = Dataset.from_dict({"text": texts})

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

def add_labels(examples):
    examples["labels"] = examples["input_ids"].copy()
    return examples

tokenized_dataset = tokenized_dataset.map(add_labels, batched=True)




Loaded 180 Q&A pairs


Map:   0%|          | 0/180 [00:00<?, ? examples/s]

Map:   0%|          | 0/180 [00:00<?, ? examples/s]

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./lora-gptneo",
    per_device_train_batch_size=2,
    num_train_epochs=1,
    logging_steps=10,
    save_steps=100,
    save_total_limit=1,
    learning_rate=3e-4,
    report_to=[],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

trainer.train()
trainer.save_model("./lora-gptneo")
print("✅ LoRA adapter saved to ./lora-gptneo")


Step,Training Loss
10,3.7058
20,2.7554
30,2.2307
40,2.4304
50,1.7092
60,2.0978
70,1.2555
80,1.6868
90,1.2772


✅ LoRA adapter saved to ./lora-gptneo


In [None]:
prompts = [
    "Create a new Git branch and switch to it.",
    "Compress the folder reports into reports.tar.gz.",
    "List all Python files in the current directory recursively.",
    "Set up a virtual environment and install requests.",
    "Fetch only the first ten lines of a file named output.log.",

    "Your edge case 1",
    "Your edge case 2",
]

def generate_steps(model, tokenizer, prompt):
    input_text = f"Instruction: {prompt}\nSteps:\n"
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Base Model outputs:")
for p in prompts:
    print(f"\nPrompt: {p}")
    print(generate_steps(base_model, tokenizer, p))

print("\nFine-tuned Model outputs:")
for p in prompts:
    print(f"\nPrompt: {p}")
    print(generate_steps(model, tokenizer, p))


Base Model outputs:

Prompt: Create a new Git branch and switch to it.
Instruction: Create a new Git branch and switch to it.
Steps:
1. Create a new Git branch.
2. Select the Git branch you want to create.
3. Select the Git branch you want to switch to.

Prompt: Compress the folder reports into reports.tar.gz.
Instruction: Compress the folder reports into reports.tar.gz.
Steps:


Prompt: List all Python files in the current directory recursively.
Instruction: List all Python files in the current directory recursively.
Steps:


Prompt: Set up a virtual environment and install requests.
Instruction: Set up a virtual environment and install requests.
Steps:

1. Install the virtual environment
2. Install the virtual environment
3. Install the virtual environment


Prompt: Fetch only the first ten lines of a file named output.log.
Instruction: Fetch only the first ten lines of a file named output.log.
Steps:


Prompt: Your edge case 1
Instruction: Your edge case 1
Steps:
1. Create a new edg

In [None]:
%%writefile agent.py
import json
import os
import sys
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

def is_shell_command(line):
    shell_cmd_starts = (
        "cd ", "ls", "git ", "mkdir", "rm", "echo", "touch",
        "python", "./", "sudo", "cat ", "cp ", "mv ", "pwd"
    )
    return line.strip().startswith(shell_cmd_starts)

def main():
    if len(sys.argv) < 2:
        print("❌ Usage: python agent.py \"<your instruction>\"")
        return

    instruction = sys.argv[1]

    base_model_name = "EleutherAI/gpt-neo-125M"
    print("⏳ Loading tokenizer and base model...")
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    base_model = AutoModelForCausalLM.from_pretrained(base_model_name)

    print("✅ Loading fine-tuned LoRA adapter...")
    model = PeftModel.from_pretrained(base_model, "./lora-gptneo")
    model.eval()

    # Few-shot style prompt with example to guide output format
    prompt = f"""
Instruction: Initialize a new Git repo and push code to GitHub
Steps:
1. git init
2. git add .
3. git commit -m "Initial commit"
4. git branch -M main
5. git remote add origin <repo_url>
6. git push -u origin main

Instruction: {instruction}
Steps:
"""

    inputs = tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            do_sample=True,
            top_p=0.9,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            early_stopping=True,
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract the steps after last "Steps:" occurrence
    steps_text = generated_text.split("Steps:")[-1].strip()
    # Filter and clean lines that look like numbered steps
    steps = []
    for line in steps_text.split("\n"):
        line = line.strip()
        # Accept lines that start with a number and dot like "1. git init"
        if line and (line[0].isdigit() and line[1] == "."):
            steps.append(line)

    if not steps:
        print("⚠️ No steps were generated.")
        return

    os.makedirs("logs", exist_ok=True)
    log_path = "logs/trace.jsonl"
    log_entries = []

    print("\n📋 Generated Steps:")
    for idx, step in enumerate(steps, start=1):
        dry_run = False
        # If first step looks like a shell command, echo it as dry-run
        # Remove numbering for checking shell command
        step_text = step.partition(" ")[2].strip() if " " in step else step
        if idx == 1 and is_shell_command(step_text):
            dry_run = True
            print(f"(Dry-run) {step_text}")
        else:
            print(f"Step {idx}: {step_text}")

        log_entries.append({
            "step": idx,
            "text": step_text,
            "dry_run": dry_run
        })

    with open(log_path, "a") as f:
        for entry in log_entries:
            f.write(json.dumps(entry) + "\n")

if __name__ == "__main__":
    main()


Overwriting agent.py


In [None]:
from google.colab import files
files.download('agent.py')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
model.save_pretrained("./lora-gptneo")


In [None]:
!zip -r lora-gptneo.zip lora-gptneo


  adding: lora-gptneo/ (stored 0%)
  adding: lora-gptneo/README.md (deflated 66%)
  adding: lora-gptneo/generation_config.json (deflated 24%)
  adding: lora-gptneo/checkpoint-90/ (stored 0%)
  adding: lora-gptneo/checkpoint-90/README.md (deflated 66%)
  adding: lora-gptneo/checkpoint-90/optimizer.pt (deflated 8%)
  adding: lora-gptneo/checkpoint-90/scheduler.pt (deflated 56%)
  adding: lora-gptneo/checkpoint-90/generation_config.json (deflated 24%)
  adding: lora-gptneo/checkpoint-90/trainer_state.json (deflated 71%)
  adding: lora-gptneo/checkpoint-90/model.safetensors (deflated 8%)
  adding: lora-gptneo/checkpoint-90/adapter_model.safetensors (deflated 7%)
  adding: lora-gptneo/checkpoint-90/config.json (deflated 59%)
  adding: lora-gptneo/checkpoint-90/training_args.bin (deflated 52%)
  adding: lora-gptneo/checkpoint-90/adapter_config.json (deflated 54%)
  adding: lora-gptneo/checkpoint-90/rng_state.pth (deflated 24%)
  adding: lora-gptneo/model.safetensors (deflated 8%)
  adding: l

In [None]:
from google.colab import files
files.download("lora-gptneo.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
%%writefile evaluate_script.py
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
import evaluate
from tqdm import tqdm

# Your 7 Q&A pairs for testing
test_data = [
  {
    "question": "Create a new Git branch and switch to it.",
    "answer": "1. git branch <branch_name>\n2. git checkout <branch_name>"
  },
  {
    "question": "Compress the folder reports into reports.tar.gz.",
    "answer": "1. tar -czvf reports.tar.gz reports"
  },
  {
    "question": "List all Python files in the current directory recursively.",
    "answer": "1. find . -name '*.py'"
  },
  {
    "question": "Set up a virtual environment and install requests.",
    "answer": "1. python3 -m venv env\n2. source env/bin/activate\n3. pip install requests"
  },
  {
    "question": "Fetch only the first ten lines of a file named output.log.",
    "answer": "1. head -n 10 output.log"
  },
  {
    "question": "How to check the size of a directory including all its contents?",
    "answer": "1. du -sh <directory_name>"
  },
  {
    "question": "How to find and delete all .tmp files in the current directory and its subdirectories?",
    "answer": "1. find . -name '*.tmp' -type f -delete"
  }
]

# Load tokenizer and base model
print("⏳ Loading tokenizer and base model...")
base_model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)

# Load fine-tuned LoRA adapter
print("✅ Loading fine-tuned LoRA adapter...")
lora_model = PeftModel.from_pretrained(base_model, "./lora-gptneo")
lora_model.eval()

# Load metrics
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

def generate_steps(model, prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id,
        )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    steps_text = text.split("Steps:")[-1].strip()
    return steps_text

print("\n🔍 Evaluating models on test data...\n")

results = []

for entry in tqdm(test_data):
    question = entry["question"]
    reference = entry["answer"]

    prompt = f"Instruction: {question}\nYou are an assistant that provides detailed step-by-step instructions.\nPlease provide numbered steps to complete the instruction.\nSteps:"

    # Generate outputs
    base_gen = generate_steps(base_model, prompt)
    lora_gen = generate_steps(lora_model, prompt)

    # Avoid BLEU crash on empty strings
    if not base_gen.strip():
        print(f"⚠️ Base model returned empty output for: {question}")
        base_gen = "No output generated."

    if not lora_gen.strip():
        print(f"⚠️ LoRA model returned empty output for: {question}")
        lora_gen = "No output generated."

    # Compute BLEU
    base_bleu = bleu.compute(predictions=[base_gen], references=[[reference]])["bleu"]
    lora_bleu = bleu.compute(predictions=[lora_gen], references=[[reference]])["bleu"]

    # Compute ROUGE-L (returns float directly)
    base_rouge = rouge.compute(predictions=[base_gen], references=[reference])["rougeL"]
    lora_rouge = rouge.compute(predictions=[lora_gen], references=[reference])["rougeL"]

    results.append({
        "question": question,
        "reference": reference,
        "base_gen": base_gen,
        "lora_gen": lora_gen,
        "base_bleu": base_bleu,
        "lora_bleu": lora_bleu,
        "base_rouge": base_rouge,
        "lora_rouge": lora_rouge,
    })

# Display results
for res in results:
    print(f"\nQuestion: {res['question']}\n")
    print(f"Reference:\n{res['reference']}\n")
    print(f"Base model output:\n{res['base_gen']}\nBLEU: {res['base_bleu']:.4f}  ROUGE-L: {res['base_rouge']:.4f}\n")
    print(f"LoRA model output:\n{res['lora_gen']}\nBLEU: {res['lora_bleu']:.4f}  ROUGE-L: {res['lora_rouge']:.4f}\n")

# Save to JSON for record keeping
with open("evaluation_results.json", "w") as f:
    json.dump(results, f, indent=2)

print("\n✅ Evaluation complete. Results saved to evaluation_results.json")


Overwriting evaluate_script.py


In [None]:
from google.colab import files
files.download('evaluate_script.py')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import rouge_score
import nltk
import absl
print("All packages imported successfully!")


All packages imported successfully!
