In [1]:
!pip install -q --no-cache-dir bitsandbytes==0.41.1
!pip install -q --no-cache-dir transformers peft datasets accelerate sacrebleu rouge-score rich

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m156.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
import json
import torch
import logging
import pandas as pd
from pathlib import Path
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from rouge_score import rouge_scorer
from sacrebleu import corpus_bleu
from rich.console import Console
from rich.table import Table
from datetime import datetime
from datasets import load_dataset, Dataset
import numpy as np

console = Console()

In [3]:
# Mount Google Drive (for saving model)
from google.colab import drive
drive.mount('/content/drive')

#Change dir
os.chdir("/content/drive/MyDrive/BerlinTask")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
DATA_FILE = "data/processed/qa_dataset.json"
OUTPUT_DIR = "/content/llama-ev-lora"
MAX_STEPS = 500                                      # demo training steps (can increase)
BATCH_SIZE = 4
LR = 2e-4
SEED = 42

In [5]:
logging.basicConfig(
    filename="pipeline.log",
    filemode="w",
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO
)
console.log(f"[bold green]Pipeline started at {datetime.now()}[/bold green]")

In [6]:
# Ensure dataset is uploaded
if not os.path.exists(DATA_FILE):
    console.log("[bold red]Please upload qa_dataset.json to Colab root![/bold red]")
else:
    console.log(f"[bold cyan]Dataset found: {DATA_FILE}[/bold cyan]")


In [7]:
with open(DATA_FILE, "r") as f:
    qa_data = json.load(f)
console.log(f"[bold yellow]Loaded {len(qa_data)} QA pairs[/bold yellow]")

df = pd.DataFrame(qa_data)
df.head()

Unnamed: 0,context,question,answer
0,"a charging station, also known as a charge poi...","What are the benefits of a charging station,?","a charging station, also known as a charge poi..."
1,"a charging station, also known as a charge poi...","What problem does a charging station, solve?","a charging station, also known as a charge poi..."
2,"a charging station, also known as a charge poi...","Who uses a charging station,?","a charging station, also known as a charge poi..."
3,"a charging station, also known as a charge poi...","Where can a charging station, be found?","a charging station, also known as a charge poi..."
4,"a charging station, also known as a charge poi...","When is a charging station, used?","a charging station, also known as a charge poi..."


In [8]:
assert os.path.exists(DATA_FILE), "Upload qa_dataset.json to Colab root"
console.log(f"[bold cyan]Dataset found: {DATA_FILE}[/bold cyan]")

with open(DATA_FILE, "r") as f:
    raw_dataset = json.load(f)
console.log(f"[bold yellow]Loaded {len(raw_dataset)} QA pairs[/bold yellow]")

In [9]:
import random
random.seed(SEED)

random.shuffle(raw_dataset)
split_idx = int(0.9 * len(raw_dataset))
train_data = raw_dataset[:split_idx]
val_data = raw_dataset[split_idx:]

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)
console.log(f"[bold cyan]Train size: {len(train_data)} | Val size: {len(val_data)}[/bold cyan]")


In [10]:
!pip install -U bitsandbytes

Collecting bitsandbytes
  Using cached bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Using cached bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl (72.9 MB)
Installing collected packages: bitsandbytes
  Attempting uninstall: bitsandbytes
    Found existing installation: bitsandbytes 0.41.1
    Uninstalling bitsandbytes-0.41.1:
      Successfully uninstalled bitsandbytes-0.41.1
Successfully installed bitsandbytes-0.46.1


In [11]:
!pip install -U bitsandbytes
!pip install -U transformers accelerate



In [12]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

In [13]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quantization_config,
    device_map="auto"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [14]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
console.log("[green]LoRA adapter added to model[/green]")


In [15]:
from sklearn.metrics import precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    # Convert to text
    preds_text = [tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions]
    labels_text = [tokenizer.decode(label, skip_special_tokens=True) for label in labels]

    # Simple exact match metric
    exact_matches = sum(p.strip() == l.strip() for p, l in zip(preds_text, labels_text))
    accuracy = exact_matches / len(preds_text)

    # BLEU & ROUGE
    bleu = corpus_bleu(preds_text, [labels_text]).score
    scorer = rouge_scorer.RougeScorer(["rouge1", "rougeL"], use_stemmer=True)
    rouge1 = np.mean([scorer.score(r, p)["rouge1"].fmeasure for r, p in zip(labels_text, preds_text)])
    rougel = np.mean([scorer.score(r, p)["rougeL"].fmeasure for r, p in zip(labels_text, preds_text)])

    # Ensure equal length for precision/recall/f1
    min_len = min(len("".join(labels_text)), len("".join(preds_text)))
    labels_text_char = list("".join(labels_text))[:min_len]
    preds_text_char = list("".join(preds_text))[:min_len]


    # Precision/Recall/F1 at character-level (simplified)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels_text_char,
        preds_text_char,
        average="macro",
        zero_division=0
    )
    return {
        "accuracy": accuracy,
        "bleu": bleu,
        "rouge1": rouge1,
        "rougeL": rougel,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

In [16]:
# Convert QA dataset into Hugging Face Dataset
with open(DATA_FILE, "r") as f:
    raw_dataset = json.load(f)

import datasets
dataset = datasets.Dataset.from_list(raw_dataset)

def preprocess_function(example):
    text = f"Question: {example['question']}\nAnswer: {example['answer']}"
    tokenized = tokenizer(text, truncation=True, padding="max_length", max_length=256)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(preprocess_function)
console.log("[green]Dataset tokenized successfully[/green]")

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [17]:
tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1, seed=SEED)
train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["test"]

training_args = TrainingArguments(
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=4,
    num_train_epochs=2,
    learning_rate=LR,
    fp16=True,
    eval_strategy="epoch",
    save_strategy="no",
    logging_steps=10,
    output_dir=OUTPUT_DIR,
    report_to="none",
    remove_unused_columns=False
)

def collate_fn(batch):
    return {
        "input_ids": torch.tensor([x["input_ids"] for x in batch]),
        "attention_mask": torch.tensor([x["attention_mask"] for x in batch]),
        "labels": torch.tensor([x["labels"] for x in batch]),
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=collate_fn,
    compute_metrics=compute_metrics
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [18]:
console.log("[yellow]Starting training with evaluation at each epoch...[/yellow]")
trainer.train()
console.log("[green]Training complete[/green]")


Epoch,Training Loss,Validation Loss,Accuracy,Bleu,Rouge1,Rougel,Precision,Recall,F1
1,0.1757,0.13392,0.0,95.506656,0.9702,0.969668,0.022173,0.022076,0.02212
2,0.0497,0.048989,0.0,97.494965,0.985414,0.985414,0.023014,0.023014,0.023013


In [None]:
# def collate_fn(batch):
#     return {
#         "input_ids": torch.tensor([x["input_ids"] for x in batch]),
#         "attention_mask": torch.tensor([x["attention_mask"] for x in batch]),
#         "labels": torch.tensor([x["labels"] for x in batch]),
#     }

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=tokenized_dataset,
#     tokenizer=tokenizer,
#     data_collator=collate_fn
# )

# console.log("[yellow]Starting training...[/yellow]")
# trainer.train()
# console.log("[green]Training complete[/green]")


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,0.398
20,0.1734
30,0.1143
40,0.0637
50,0.0387
60,0.0229
70,0.0151
80,0.014
90,0.0137
100,0.0136


In [19]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

ValueError: You have set `args.eval_strategy` to IntervalStrategy.EPOCH but you didn't pass an `eval_dataset` to `Trainer`. Either set `args.eval_strategy` to `no` or pass an `eval_dataset`. 

In [20]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
console.log(f"[green]LoRA adapter saved to {OUTPUT_DIR}[/green]")

# Copy to Drive
!cp -r {OUTPUT_DIR} /content/drive/MyDrive/llama-ev-lora
console.log("[bold cyan]Model copied to Google Drive[/bold cyan]")


In [21]:
# Take a few samples for evaluation
sample_questions = [x["question"] for x in raw_dataset[:10]]
true_answers = [x["answer"] for x in raw_dataset[:10]]

# Generate predictions
predictions = []
for q in sample_questions:
    inputs = tokenizer(f"Question: {q}", return_tensors="pt").to(model.device)
    output = model.generate(**inputs, max_new_tokens=50)
    pred = tokenizer.decode(output[0], skip_special_tokens=True)
    predictions.append(pred)

# ROUGE
scorer = rouge_scorer.RougeScorer(["rouge1", "rougeL"], use_stemmer=True)
rouge1 = [scorer.score(ref, pred)["rouge1"].fmeasure for ref, pred in zip(true_answers, predictions)]
rougel = [scorer.score(ref, pred)["rougeL"].fmeasure for ref, pred in zip(true_answers, predictions)]
bleu = corpus_bleu(predictions, [true_answers]).score

# Display metrics
table = Table(title="Evaluation Metrics")
table.add_column("Metric", justify="center")
table.add_column("Score", justify="center")
table.add_row("ROUGE-1", f"{sum(rouge1)/len(rouge1):.4f}")
table.add_row("ROUGE-L", f"{sum(rougel)/len(rougel):.4f}")
table.add_row("BLEU", f"{bleu:.2f}")
console.print(table)

In [22]:
predictions = trainer.predict(val_dataset)
metrics = predictions.metrics

table = Table(title="Final Evaluation Metrics")
for k, v in metrics.items():
    table.add_row(k, f"{v:.4f}" if isinstance(v, float) else str(v))
console.print(table)

In [23]:
import string
import re

def normalize_text(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)
    def white_space_fix(text):
        return " ".join(text.split())
    def remove_punc(text):
        return "".join(ch for ch in text if ch not in string.punctuation)
    return white_space_fix(remove_articles(remove_punc(s.lower())))

def squad_metrics(predictions, references):
    """Compute Exact Match and F1 like SQuAD."""
    exact_matches, f1s = [], []
    for pred, ref in zip(predictions, references):
        pred_tokens = normalize_text(pred).split()
        ref_tokens = normalize_text(ref).split()

        # Exact Match
        exact_matches.append(int(pred_tokens == ref_tokens))

        # F1
        common = set(pred_tokens) & set(ref_tokens)
        num_same = len(common)
        if len(pred_tokens) == 0 or len(ref_tokens) == 0:
            f1 = int(pred_tokens == ref_tokens)
        elif num_same == 0:
            f1 = 0
        else:
            precision = num_same / len(pred_tokens)
            recall = num_same / len(ref_tokens)
            f1 = (2 * precision * recall) / (precision + recall)
        f1s.append(f1)
    return 100 * sum(exact_matches) / len(exact_matches), 100 * sum(f1s) / len(f1s)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    preds_text = [tokenizer.decode(p, skip_special_tokens=True) for p in predictions]
    labels_text = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]

    # Compute metrics
    exact_match, f1 = squad_metrics(preds_text, labels_text)
    bleu = corpus_bleu(preds_text, [labels_text]).score
    scorer = rouge_scorer.RougeScorer(["rouge1", "rougeL"], use_stemmer=True)
    rouge1 = np.mean([scorer.score(r, p)["rouge1"].fmeasure for r, p in zip(labels_text, preds_text)])
    rougel = np.mean([scorer.score(r, p)["rougeL"].fmeasure for r, p in zip(labels_text, preds_text)])

    return {
        "exact_match": exact_match,
        "f1": f1,
        "bleu": bleu,
        "rouge1": rouge1,
        "rougeL": rougel
    }


In [24]:
predictions = trainer.predict(val_dataset)
metrics = predictions.metrics

table = Table(title="Final Evaluation Metrics (SQuAD-style)")
for k, v in metrics.items():
    table.add_row(k, f"{v:.2f}")
console.print(table)


In [25]:
def ask_question(question: str):
    inputs = tokenizer(f"Question: {question}", return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    console.print(f"[bold blue]Question:[/bold blue] {question}")
    console.print(f"[bold green]Answer:[/bold green] {answer}")

# Example usage
ask_question("Where are EV charging stations usually located?")


# Connect to google drive and change directory

In [2]:
# Mount Google Drive (for saving model)
from google.colab import drive
import os
drive.mount('/content/drive')

#Change dir
os.chdir("/content/drive/MyDrive/BerlinTask")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Install & Imports

In [3]:
!pip install -q -U bitsandbytes
!pip install -q transformers peft datasets accelerate bitsandbytes \
sacrebleu rouge-score scikit-learn rich

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m28.5 MB/s[0m eta [36m0

# Imports & Config

In [4]:
import os, json, re, string, torch, numpy as np
from datetime import datetime
from rich.console import Console
from rich.table import Table
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
from rouge_score import rouge_scorer
from sacrebleu import corpus_bleu

console = Console()

# Config
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
DATA_FILE = "data/processed/qa_dataset.json"
OUTPUT_DIR = "/content/llama-ev-lora"
MAX_STEPS = 500                                      # demo training steps (can increase)
BATCH_SIZE = 4
LR = 2e-4
SEED = 42
EPOCHS = 15

console.log(f"Pipeline started at {datetime.now()}")

# Dataset Upload & Split

In [5]:
assert os.path.exists(DATA_FILE), "Upload qa_dataset.json to Colab root"
with open(DATA_FILE) as f: qa_data = json.load(f)

console.log(f"Loaded {len(qa_data)} QA pairs")

# Split 90% train, 10% val
import random
random.seed(SEED)
random.shuffle(qa_data)
split = int(0.9 * len(qa_data))
train_data, val_data = qa_data[:split], qa_data[split:]
console.log(f"Train size: {len(train_data)}, Val size: {len(val_data)}")

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)

# Tokenizer & Preprocessing

In [6]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

def preprocess(example):
    text = f"Question: {example['question']}\nAnswer: {example['answer']}"
    enc = tokenizer(text, truncation=True, padding="max_length", max_length=256)
    enc["labels"] = enc["input_ids"].copy()
    return enc

train_dataset = train_dataset.map(preprocess)
val_dataset = val_dataset.map(preprocess)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Map:   0%|          | 0/945 [00:00<?, ? examples/s]

Map:   0%|          | 0/105 [00:00<?, ? examples/s]

# Model + LoRA

In [9]:
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,
                                             load_in_4bit=True,
                                             bnb_4bit_quant_type="nf4",
                                             device_map="auto")
lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj","v_proj"],
                         lora_dropout=0.05, bias="none", task_type="CAUSAL_LM")
model = get_peft_model(model, lora_config)
console.log("[green]LoRA adapter attached[/green]")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

# SQuAD Metrics

In [10]:
def normalize_text(s):
    def remove_articles(text): return re.sub(r'\b(a|an|the)\b', ' ', text)
    def white_space_fix(text): return " ".join(text.split())
    def remove_punc(text): return "".join(ch for ch in text if ch not in string.punctuation)
    return white_space_fix(remove_articles(remove_punc(s.lower())))

In [11]:
def squad_metrics(predictions, references):
    exact_matches, f1s = [], []
    for pred, ref in zip(predictions, references):
        pred_tokens, ref_tokens = normalize_text(pred).split(), normalize_text(ref).split()
        exact_matches.append(int(pred_tokens == ref_tokens))
        common = set(pred_tokens) & set(ref_tokens)
        num_same = len(common)
        if len(pred_tokens) == 0 or len(ref_tokens) == 0:
            f1 = int(pred_tokens == ref_tokens)
        elif num_same == 0:
            f1 = 0
        else:
            precision, recall = num_same / len(pred_tokens), num_same / len(ref_tokens)
            f1 = (2 * precision * recall) / (precision + recall)
        f1s.append(f1)
    return 100 * sum(exact_matches) / len(exact_matches), 100 * sum(f1s) / len(f1s)

In [12]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    preds_text = [tokenizer.decode(p, skip_special_tokens=True) for p in predictions]
    labels_text = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]
    exact_match, f1 = squad_metrics(preds_text, labels_text)
    bleu = corpus_bleu(preds_text, [labels_text]).score
    scorer = rouge_scorer.RougeScorer(["rouge1","rougeL"], use_stemmer=True)
    rouge1 = np.mean([scorer.score(r,p)["rouge1"].fmeasure for r,p in zip(labels_text,preds_text)])
    rougel = np.mean([scorer.score(r,p)["rougeL"].fmeasure for r,p in zip(labels_text,preds_text)])
    return {"exact_match":exact_match,"f1":f1,"bleu":bleu,"rouge1":rouge1,"rougeL":rougel}

# Training

In [None]:
from transformers import Trainer, TrainingArguments

tokenized_dataset = train_dataset.train_test_split(test_size=0.1, seed=SEED)
train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["test"]

args = TrainingArguments(
    per_device_train_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    eval_strategy="epoch",
    logging_steps=10,
    save_strategy="no",
    output_dir=OUTPUT_DIR,
    fp16=True,
    report_to="none"
)

def collate_fn(batch):
    return {"input_ids":torch.tensor([x["input_ids"] for x in batch]),
            "attention_mask":torch.tensor([x["attention_mask"] for x in batch]),
            "labels":torch.tensor([x["labels"] for x in batch])}

trainer = Trainer(model=model, args=args, train_dataset=train_dataset,
                  eval_dataset=val_dataset, tokenizer=tokenizer,
                  data_collator=collate_fn, compute_metrics=compute_metrics)

console.log("[yellow]Starting training...[/yellow]")
trainer.train()
console.log("[green]Training complete[/green]")

  trainer = Trainer(model=model, args=args, train_dataset=train_dataset,
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.




# Save Model

In [None]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
!cp -r {OUTPUT_DIR} /content/drive/MyDrive/llama-ev-lora
console.log("[cyan]Model saved to Google Drive[/cyan]")

# Final Evaluation

In [None]:
predictions = trainer.predict(val_dataset)
table = Table(title="Final Evaluation Metrics (SQuAD-style)")
for k, v in predictions.metrics.items():
    table.add_row(k, f"{v:.2f}")
console.print(table)


# Interactive Q&A

In [None]:
def ask_question(question: str):
    inputs = tokenizer(f"Question: {question}", return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    console.print(f"[bold blue]Q:[/bold blue] {question}")
    console.print(f"[bold green]A:[/bold green] {answer}")

ask_question("Where are public charging stations usually located?")