### NLPmaxxing traffic report generation demo

To run the demo, you need a test **dataset**.
- You may get the `.csv` dataset [here](https://unilj-my.sharepoint.com/personal/ms88481_student_uni-lj_si/_layouts/15/onedrive.aspx?id=%2Fpersonal%2Fms88481_student_uni-lj_si%2FDocuments%2FNLPmaxxing%2Fdata&CT=1745326758119&OR=OWA-NT-Mail&CID=0bd972ad-52fb-91b0-bff4-9b91bc82bd3e&e=5%3Adc36404f4a774518910dc9426d12c60d&sharingv2=true&fromShare=true&at=9&FolderCTID=0x012000FC22EB94776AE7409A90B15B2792ED95&noAuthRedirect=1).
- You should store that file in the `./data/` folder.
- You can use the `data-hf-single-in-out.ipynb` notebook to convert the CSV file to a Hugging Face dataset and save it in the `./data/hf/` folder.

---

Likewise, you need a **model**.
- You can get a model from [here](https://unilj-my.sharepoint.com/personal/ms88481_student_uni-lj_si/_layouts/15/onedrive.aspx?id=%2Fpersonal%2Fms88481_student_uni-lj_si%2FDocuments%2FNLPmaxxing%2Fmodels&CT=1745326758119&OR=OWA-NT-Mail&CID=0bd972ad-52fb-91b0-bff4-9b91bc82bd3e&e=5%3Adc36404f4a774518910dc9426d12c60d&sharingv2=true&fromShare=true&at=9&FolderCTID=0x012000FC22EB94776AE7409A90B15B2792ED95&noAuthRedirect=1).
- Store the model files in the `./models/` folder.


In [1]:
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

from scripts_split.common_s import prepare_dataset, convert_to_gemma_chat_inference, convert_to_gemma_chat_inference_improve

from pathlib import Path
import random

In [2]:
# Load dataset
dataset = prepare_dataset("test", rm_columns=[])
data_list = dataset.to_list()
unique_ids = list({entry['output']['id'] for entry in data_list})

In [3]:
ft_model_path = Path("/home/aleks/Documents/Faks/ONJ/ul-fri-nlp-course-project-2024-2025-nlpmaxxing/models/nlpmaxxing-rtvslo-trfc-split-GaMS-2B-Instruct-v6-4096-MERGED")

device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    ft_model_path,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

model.eval()

tokenizer = AutoTokenizer.from_pretrained(ft_model_path, local_files_only=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
# HTML renderer
def format_display(input_text):
    html = f"""
        <style>
            * {{overlofw-x:hidden;}}
            .container {{
                display: flex;
                flex-direction: row;
                gap: 20px;
                margin-top: 10px;
            }}
            .input-box {{
                width: 100%;
                max-height: 300px;
                overflow-y: auto;
                overflow-x: hidden;
                white-space: pre-wrap;
                padding: 10px;
                background-color: #1e1e1e;
                color: #f8f8f2;
            }}
            .output-container {{
                display: flex;
                flex-direction: row;
                gap: 20px;
                margin-top: 10px;
            }}
            .target-box, .generated-box {{
                width: 50%;
                max-height: 400px;
                overflow: auto;
                white-space: pre-wrap;
                padding: 10px;
                background-color: #1e1e1e;
                color: #f8f8f2;
            }}
            b {{
                color: #ffd700;
            }}
        </style>
        <div class="container">
            <div class="input-box"><b>Input</b><hr>{input_text}</div>
        </div>
    """
    return HTML(html)

# Output renderer (target and generated output side-by-side)
def build_output_boxes(target, generated):
    html = f"""
        <div class="output-container">
            <div class="target-box"><b>Target</b><hr>{target}</div>
            <div class="generated-box"><b>Generated Output</b><hr>{generated}</div>
        </div>
    """
    return HTML(html)

In [5]:
# Widget elements
# Row selector
row_selector = widgets.BoundedIntText(
    value=1,
    min=1,
    max=len(unique_ids),
    step=1,
    description="Row #:",
    layout=widgets.Layout(width="200px", height="30px")
)
# Generate button
generate_button = widgets.Button(
    description="Generate",
    button_style="success",
    layout=widgets.Layout(width="200px", height="30px")
)
# Random button
random_button = widgets.Button(
    description="Random",
    icon="redo",
    button_style="info",
    layout=widgets.Layout(width="120px", height="30px")
)
# Make line
inputs_inline = widgets.HBox([row_selector, random_button, generate_button])

# Output display area
output_area = widgets.Output()

In [6]:
# Callback when row is selected
def show_row(change):
    idx = change["new"]-1

    selected_id = unique_ids[idx]
    matching_inputs = sorted(
        [item for item in data_list if item["output"]["id"] == selected_id],
        key=lambda x: x["input_index"]
    )

    # Use the first item as the reference for target
    selected_row["input"] = matching_inputs
    selected_row["target"] = matching_inputs[0]["target"].strip() if matching_inputs else ""
    selected_row["generated"] = ""
    
    update_output("")
    
# Handle generation
def on_generate_clicked(_):
    # Show loading
    update_output("Loading ...")

    generated = generate_output()
    selected_row["generated"] = generated

    # Re-render full layout
    update_output(generated)

# Handle random selection
def on_random_clicked(_):
    value = random.randint(0, len(unique_ids)-1)
    row_selector.value = value
    show_row({"new": value})

# Store state
selected_row = {"input": "", "target": "", "generated": ""}

# Wire events
row_selector.observe(show_row, names='value')
generate_button.on_click(on_generate_clicked)
random_button.on_click(on_random_clicked)

In [7]:
def update_output(text):
    with output_area:
        clear_output()
        merged_input = "\n".join(item["input"] for item in selected_row["input"]).strip()
        display(format_display(merged_input))
        display(build_output_boxes(selected_row["target"], text))

def clean_output(text):
    parts = text.split("<start_of_turn>model")
    
    if len(parts) != 2:
        return "BAD OUTPUT"
    
    out = parts[1].strip()

    out = out.replace("## Traffic Report:", "")
    out = out.replace("<end_of_turn>", "")
    out = out.replace("<eos>", "")

    return out.strip()

def generate_output():    
    previous_output = ""
    generated_text = ""

    for input_index, i in enumerate(selected_row["input"], start=1):
        update_output(f"Processing input {input_index}/{len(selected_row["input"])} ...")
        
        sample = {
            "input": i['input'],
            "target": i['target'],
        }
        
        if input_index == 0:
            s = convert_to_gemma_chat_inference(sample)
        else:
            s = convert_to_gemma_chat_inference_improve(sample, previous_output)

        prompt = tokenizer.apply_chat_template(
            s,
            tokenize=False,
            add_generation_prompt=False
        )

        inputs = tokenizer(prompt, return_tensors='pt').to(device)

        with torch.no_grad():
            generated_tokens = model.generate(
                **inputs,
                max_new_tokens=512
            )

        output = tokenizer.decode(
            generated_tokens[0],
            skip_special_tokens=False
        )

        output = clean_output(output)

    return output

In [8]:
# Show UI
display(widgets.VBox([inputs_inline, output_area]))
row_selector.value = 0
show_row({"new": 0})

VBox(children=(HBox(children=(BoundedIntText(value=1, description='Row #:', layout=Layout(height='30px', widthâ€¦