# Connect to google drive and change directory

In [4]:
# Mount Google Drive (for saving model)
from google.colab import drive
import os
drive.mount('/content/drive')

#Change dir
os.chdir("/content/drive/MyDrive/BerlinTask")

Mounted at /content/drive


# Install & Imports

In [5]:
!pip install -q -U bitsandbytes
!pip install -q transformers peft datasets accelerate bitsandbytes \
sacrebleu rouge-score scikit-learn rich

# Imports & Config

In [6]:
import os, json, re, string, torch, numpy as np
from datetime import datetime
from rich.console import Console
from rich.table import Table
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
from rouge_score import rouge_scorer
from sacrebleu import corpus_bleu

console = Console()

# Config
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
DATA_FILE = "data/processed/qa_dataset.json"
OUTPUT_DIR = "/content/llama-ev-lora"
MAX_STEPS = 500                                      # demo training steps (can increase)
BATCH_SIZE = 4
LR = 2e-4
SEED = 42
EPOCHS = 15

console.log(f"Pipeline started at {datetime.now()}")

# Dataset Upload & Split

In [7]:
assert os.path.exists(DATA_FILE), "Upload qa_dataset.json to Colab root"
with open(DATA_FILE) as f: qa_data = json.load(f)

console.log(f"Loaded {len(qa_data)} QA pairs")

# Split 90% train, 10% val
import random
random.seed(SEED)
random.shuffle(qa_data)
split = int(0.9 * len(qa_data))
train_data, val_data = qa_data[:split], qa_data[split:]
console.log(f"Train size: {len(train_data)}, Val size: {len(val_data)}")

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)

# Tokenizer & Preprocessing

In [8]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

def preprocess(example):
    text = f"Question: {example['question']}\nAnswer: {example['answer']}"
    enc = tokenizer(text, truncation=True, padding="max_length", max_length=256)
    enc["labels"] = enc["input_ids"].copy()
    return enc

train_dataset = train_dataset.map(preprocess)
val_dataset = val_dataset.map(preprocess)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Map:   0%|          | 0/945 [00:00<?, ? examples/s]

Map:   0%|          | 0/105 [00:00<?, ? examples/s]

# Model + LoRA

In [9]:
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,
                                             load_in_4bit=True,
                                             bnb_4bit_quant_type="nf4",
                                             device_map="auto")
lora_config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj","v_proj"],
                         lora_dropout=0.05, bias="none", task_type="CAUSAL_LM")
model = get_peft_model(model, lora_config)
console.log("[green]LoRA adapter attached[/green]")

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

# SQuAD Metrics

In [10]:
def normalize_text(s):
    def remove_articles(text): return re.sub(r'\b(a|an|the)\b', ' ', text)
    def white_space_fix(text): return " ".join(text.split())
    def remove_punc(text): return "".join(ch for ch in text if ch not in string.punctuation)
    return white_space_fix(remove_articles(remove_punc(s.lower())))

In [11]:
def squad_metrics(predictions, references):
    exact_matches, f1s = [], []
    for pred, ref in zip(predictions, references):
        pred_tokens, ref_tokens = normalize_text(pred).split(), normalize_text(ref).split()
        exact_matches.append(int(pred_tokens == ref_tokens))
        common = set(pred_tokens) & set(ref_tokens)
        num_same = len(common)
        if len(pred_tokens) == 0 or len(ref_tokens) == 0:
            f1 = int(pred_tokens == ref_tokens)
        elif num_same == 0:
            f1 = 0
        else:
            precision, recall = num_same / len(pred_tokens), num_same / len(ref_tokens)
            f1 = (2 * precision * recall) / (precision + recall)
        f1s.append(f1)
    return 100 * sum(exact_matches) / len(exact_matches), 100 * sum(f1s) / len(f1s)

In [12]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    preds_text = [tokenizer.decode(p, skip_special_tokens=True) for p in predictions]
    labels_text = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]
    exact_match, f1 = squad_metrics(preds_text, labels_text)
    bleu = corpus_bleu(preds_text, [labels_text]).score
    scorer = rouge_scorer.RougeScorer(["rouge1","rougeL"], use_stemmer=True)
    rouge1 = np.mean([scorer.score(r,p)["rouge1"].fmeasure for r,p in zip(labels_text,preds_text)])
    rougel = np.mean([scorer.score(r,p)["rougeL"].fmeasure for r,p in zip(labels_text,preds_text)])
    return {"exact_match":exact_match,"f1":f1,"bleu":bleu,"rouge1":rouge1,"rougeL":rougel}

# Training

In [13]:
from transformers import Trainer, TrainingArguments

tokenized_dataset = train_dataset.train_test_split(test_size=0.1, seed=SEED)
train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["test"]

args = TrainingArguments(
    per_device_train_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    eval_strategy="epoch",
    logging_steps=10,
    save_strategy="no",
    output_dir=OUTPUT_DIR,
    fp16=True,
    report_to="none"
)

def collate_fn(batch):
    return {"input_ids":torch.tensor([x["input_ids"] for x in batch]),
            "attention_mask":torch.tensor([x["attention_mask"] for x in batch]),
            "labels":torch.tensor([x["labels"] for x in batch])}

trainer = Trainer(model=model, args=args, train_dataset=train_dataset,
                  eval_dataset=val_dataset, tokenizer=tokenizer,
                  data_collator=collate_fn, compute_metrics=compute_metrics)

console.log("[yellow]Starting training...[/yellow]")
trainer.train()
console.log("[green]Training complete[/green]")

  trainer = Trainer(model=model, args=args, train_dataset=train_dataset,
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.




Epoch,Training Loss,Validation Loss,Exact Match,F1,Bleu,Rouge1,Rougel
1,0.0331,0.025842,0.0,62.646656,97.306437,0.968934,0.966554
2,0.018,0.021728,6.315789,62.304587,97.630318,0.970161,0.968801
3,0.0283,0.022478,0.0,62.516663,97.436348,0.969593,0.966858
4,0.0212,0.023776,0.0,62.787282,97.35273,0.969386,0.96846
5,0.0175,0.024863,6.315789,62.462847,97.48136,0.967761,0.965462
6,0.0134,0.028621,0.0,62.495597,97.482638,0.96874,0.966524
7,0.0127,0.025326,0.0,62.332827,97.499909,0.967596,0.963831
8,0.0141,0.027092,0.0,62.403649,97.528618,0.968514,0.96532
9,0.0136,0.02648,0.0,62.600986,97.450645,0.969614,0.967398
10,0.0138,0.026658,6.315789,62.547587,97.60629,0.969619,0.967834


# Save Model

In [14]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
!cp -r {OUTPUT_DIR} /content/drive/MyDrive/llama-ev-lora
console.log("[cyan]Model saved to Google Drive[/cyan]")

# Final Evaluation

In [15]:
predictions = trainer.predict(val_dataset)
table = Table(title="Final Evaluation Metrics (SQuAD-style)")
for k, v in predictions.metrics.items():
    table.add_row(k, f"{v:.2f}")
console.print(table)


# Interactive Q&A

In [16]:
def ask_question(question: str):
    inputs = tokenizer(f"Question: {question}", return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    console.print(f"[bold blue]Q:[/bold blue] {question}")
    console.print(f"[bold green]A:[/bold green] {answer}")

ask_question("Where are public charging stations usually located?")

In [17]:
from rich.console import Console
from rich.prompt import Prompt
from rich.spinner import Spinner
import time

In [18]:
def interactive_chat():
    console.print("[bold yellow]Welcome to EV QA Chat![/bold yellow]")
    console.print("[bold cyan]Type 'exit' to quit.[/bold cyan]\n")

    while True:
        question = Prompt.ask("[bold blue]You[/bold blue]")
        if question.lower() in ["exit", "quit"]:
            console.print("[bold green]Goodbye![/bold green]")
            break

        with console.status("[bold green]Thinking...[/bold green]", spinner="dots"):
            inputs = tokenizer(f"Question: {question}", return_tensors="pt").to(model.device)
            outputs = model.generate(**inputs, max_new_tokens=50)
            time.sleep(0.5)  # Simulate thinking delay
            answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

        console.print(f"[bold blue]Q:[/bold blue] {question}")
        console.print(f"[bold green]A:[/bold green] {answer}\n")

In [19]:
interactive_chat()

How long does it take to fully charge an electric vehicle?


Output()

What is the difference between Level 1, Level 2, and DC fast charging?


Output()

What are the benefits of installing a home charging station?


Output()

exit


In [20]:
interactive_chat()

Why do some EV chargers stop charging unexpectedly?


Output()

exit
