# Install dependencies

In [1]:
!pip install trl
!pip install transformers datasets num2words peft
!pip install hf_xet
!pip install wandb
!pip install --upgrade openai
!pip install vllm

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To updat

In [2]:
!pip install rich

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


# Load dataset (Path-VQA subset)

In [3]:
from datasets import load_dataset
ds_subset = load_dataset("flaviagiammarino/path-vqa", split="train", streaming=True)
ds_subset = ds_subset.take(1000)

# Data preprocessing and sample generation

In [None]:
from PIL import Image
import torch
import numpy as np
from io import BytesIO
import base64


def pil_to_data_uri(image, format='PNG'):
    """Convert PIL Image to data URI - kept for reference if needed elsewhere"""
    buffer = BytesIO()
    image.save(buffer, format=format)
    img_bytes = buffer.getvalue()
    img_b64 = base64.b64encode(img_bytes).decode('utf-8')

    mime_type = f"image/{format.lower()}"
    data_uri = f"data:{mime_type};base64,{img_b64}"
    return data_uri


def get_grpo_sample(example):
    '''
    Input: dict example from ds_subset, including PIL image object,
           question, chosen, and rejected.
    Output: a single sample dict formatted for GRPO training.
    '''
    question_sample = example['question']
    question_sample += "\n\nPlease follow the exact format: <think>...</think> <answer>...</answer> \n\nOutput in the given format:"
    chosen_sample = example['answer']

    if isinstance(example['image'], Image.Image):
        img_pil_original = example['image']
   
    elif isinstance(example['image'], list) and len(example['image']) > 0 and isinstance(example['image'][0], Image.Image):
        print(f"Warning: example['image'] was a list for idx {example.get('idx', 'Unknown ID')}. Using the first image.")
        img_pil_original = example['image'][0]
    else:
        raise ValueError(f"Unexpected image format for example idx: {example.get('idx', 'Unknown ID')}. Expected PIL Image, got {type(example['image'])}")

    if img_pil_original is None:
        raise ValueError(f"Image is None for example idx: {example.get('idx', 'Unknown ID')}")

    if img_pil_original.mode != 'RGB':
        img_pil_original = img_pil_original.convert('RGB')

    img_pil_resized = img_pil_original.resize((128, 128))  # reduce vRAM burden

    # System Prompt for General Reasoning with Image
    SYSTEM_PROMPT_GRPO = r'''You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.

Only answer in the following format:
<think>
Reasoning and thinking goes here.
</think>
<answer>
Actual answer goes here
</answer>

Please answer the following medical question based on the input image. Output the thinking process in <think> </think> and final answer in <answer> </answer> tags.The output format should be as follows: <think> ... </think> <answer>...</answer>
'''

    formatted_prompt = [
        {'role': 'system', 'content': [{"type": "text", "text": SYSTEM_PROMPT_GRPO}]},
        {'role': 'user', 'content': [
            {"type": "image", "image": img_pil_resized},  
            {"type": "text", "text": question_sample},
        ]}
    ]

    return {
        'prompt': formatted_prompt,
        'image': img_pil_resized,  
        'solution': chosen_sample,
    }


def dataset_gen_grpo(dataset_source):
    '''
    Generator function for GRPO training data.
    Input: dataset_source (e.g., your 'ds_subset' loaded from Hugging Face)
    Output: yields single GRPO samples.
    '''
    if dataset_source is None:
        print("Error: dataset_source is None. Cannot generate GRPO samples.")
        return

    processed_count = 0
    for item in dataset_source:
        try:
            grpo_sample = get_grpo_sample(item)
            yield grpo_sample
            processed_count += 1
        except ValueError as e:
            item_id = item.get('idx', item.get('id', 'Unknown ID (no idx/id)'))
            print(f"Skipping example {item_id} due to ValueError: {e}")
            continue
        except Exception as e:
            item_id = item.get('idx', item.get('id', 'Unknown ID (no idx/id)'))
            print(f"An unexpected error occurred while processing item {item_id}: {e}")
            # import traceback # Uncomment for detailed debugging
            # traceback.print_exc() # Uncomment for detailed debugging
            continue
    print(f"Finished generating GRPO samples. Total processed and yielded (if no errors): {processed_count}")


# Initialize the generator
my_grpo_gen = dataset_gen_grpo(ds_subset)
all_grpo_samples = []
for sample in my_grpo_gen:
    all_grpo_samples.append(sample)


print(f"\n--- Finished collecting all GRPO samples ---")
print(f"Total GRPO samples generated: {len(all_grpo_samples)}")

Finished generating GRPO samples. Total processed and yielded (if no errors): 1000

--- Finished collecting all GRPO samples ---
Total GRPO samples generated: 1000


In [5]:
print(all_grpo_samples[0])

{'prompt': [{'role': 'system', 'content': [{'type': 'text', 'text': 'You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning.\n\nOnly answer in the following format:\n<think>\nReasoning and thinking goes here.\n</think>\n<answer>\nActual answer goes here\n</answer>\n\nPlease answer the following medical question based on the input image. Output the thinking process in <think> </think> and final answer in <answer> </answer> tags.The output format should be as follows: <think> ... </think> <answer>...</answer>\n'}]}, {'role': 'user', 'content': [{'type': 'image', 'image': <PIL.Image.Image image mode=RGB size=128x128 at 0x7014C9B6B050>}, {'type': 'text', 'text': 'where are liver stem cells (oval cells) located?\n\nPlease follow the exact format: <think>...</think> <answer>...</answer> \n\nOutput in the given format:'}]}], 'image': <PIL.Image.Image image mode=RGB size=128x128 at 0x7014C9B6B050>, 'solution': 'in the canals of hering'}


# Scoring and reward function LLM as a Judge

In [None]:
import re
import os
from openai import OpenAI

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key="YOUR-API-KEY",
)

# General call_model function
def call_model(messages: list[dict], model: str, temperature: float = 0.0):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=4000  
    )
    return response.choices[0].message.content


def judge_answer(completion: str, solution: str) -> float:
    prompt = [
        {
            "role": "system",
            "content": "You are a strict medical exam grader. Given a predicted answer and the ground truth, rate the answer on a scale from 0 to 1 based on correctness of answer inside <answer>...</answer> and completeness of the format. The expected format is <think>...</think> <answer>...</answer>. The output should be a single float number. Do not include any other text in your response."
        },
        {
            "role": "user",
            "content": f"""Predicted: {completion}
Ground Truth: {solution}

Rate this answer using the following rubric:
0.0: answer incorrect
0.3: answer incorrect but format correct
0.6: answer correct but missing minor detail
1.0: Both format and answer correct

Score:"""
        }
    ]

    raw_score = call_model(prompt, model="meta-llama/llama-3-8b-instruct:nitro")

    match = re.search(r"\b(0(?:\.\d+)?|1(?:\.0)?)\b", raw_score.strip())
    return float(match.group(1)) if match else 0.0

def reward_function(completions: list[str], solution: list[str],**kwargs):
    return [judge_answer(comp, sol) for comp, sol in zip(completions, solution)]



# Load SmolVLM2 model and apply LoRA; training hyperparameters

In [None]:
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
import peft
from peft import LoraConfig, get_peft_model

model_path = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
processor = AutoProcessor.from_pretrained(
    model_path,
    trust_remote_code=True,
)

processor.tokenizer.padding_side = "left"

model = AutoModelForImageTextToText.from_pretrained(
    model_path,
    #torch_dtype=torch.float32,
    trust_remote_code=True,
    device_map="auto"
)

lora_config = LoraConfig(
        r=8,
        lora_alpha=32,
        lora_dropout=0.1,
        target_modules=["q_proj", "v_proj", "k_proj"],
    )

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

OUTPUT_DIR = "./grpo_smolvlm_finetuned_bf16"
LEARNING_RATE = 1e-5
NUM_TRAIN_EPOCHS = 1


You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 3,852,288 || all params: 2,250,637,168 || trainable%: 0.1712


# GPU memory cleanup utilities

In [8]:
import torch
def free_memory():
    torch.cuda.empty_cache()
    import gc; gc.collect()

# Run periodically
free_memory()

In [9]:
# training_args = GRPOConfig(
#         output_dir= OUTPUT_DIR,
#         learning_rate=LEARNING_RATE,
#         remove_unused_columns=False,
#         per_device_train_batch_size=4,
#         num_train_epochs=NUM_TRAIN_EPOCHS,
#         bf16=True,
#         logging_steps=1,
#         save_strategy="steps",
#         gradient_checkpointing=True,
#         save_steps=100,
#         num_generations=4,
#         log_completions=True
#         )


# trainer = GRPOTrainer(
#     model=model,
#     processing_class=processor,
#     reward_funcs= reward_function,
#     args=training_args,
#     train_dataset= all_grpo_samples
# )

# # 5. Train
# trainer.train()
# # trainer.save_model(args.output_dir)

# Rich callback to render live completions and metrics

In [10]:
from transformers import TrainerCallback
from rich.console import Console
from rich.panel import Panel
from rich import box
from rich.text import Text
import time
from IPython.display import display, HTML, clear_output

class RichCompletionCallback(TrainerCallback):
    def __init__(self):
        self.console = Console(width=100)
        self.step_count = 0
        self.last_log_time = time.time()

    def on_log(self, args, state, control, logs=None, **kwargs):
        if not logs:
            return

        # Check if we have completions in the logs
        completions = logs.get("completions", None)
        if completions and isinstance(completions, list):
            # Only log every few seconds to avoid spamming
            current_time = time.time()
            if current_time - self.last_log_time < 5 and self.step_count > 0:
                return

            self.last_log_time = current_time
            self.step_count += 1

            # Clear previous output for cleaner display in Colab
            clear_output(wait=True)

            # Print step header
            self.console.print(f"[bold cyan]Generation Step: {self.step_count}[/bold cyan]")

            # Display each completion in a panel
            for idx, completion in enumerate(completions):
                if not isinstance(completion, str):
                    continue

                # Format the text (limit length for Colab display)
                text = Text(completion.strip()[:500])
                if len(completion) > 500:
                    text.append("... [truncated]")

                # Create a panel with a nice border
                panel = Panel(
                    text,
                    title=f"[bold green]Completion {idx+1}[/bold green]",
                    border_style="bright_blue",
                    box=box.ROUNDED,
                    expand=False,
                    padding=(1, 2)
                )

                # Display the panel
                self.console.print(panel)

            # Add reward information if available
            if "rewards" in logs:
                rewards = logs.get("rewards")
                if isinstance(rewards, list):
                    reward_text = "\n".join([f"Sample {i+1}: {r:.4f}" for i, r in enumerate(rewards)])
                    self.console.print(Panel(
                        reward_text,
                        title="[bold yellow]Rewards[/bold yellow]",
                        border_style="yellow",
                        box=box.ROUNDED,
                        expand=False,
                    ))

            # Add training metrics
            metrics = {k: v for k, v in logs.items()
                      if k not in ['completions', 'rewards'] and not k.startswith('_')}
            if metrics:
                metrics_text = "\n".join([f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}"
                                        for k, v in metrics.items()])
                self.console.print(Panel(
                    metrics_text,
                    title="[bold magenta]Training Metrics[/bold magenta]",
                    border_style="magenta",
                    box=box.ROUNDED,
                    expand=False,
                ))

# Configure GRPO Trainer and run training

In [None]:
from trl import GRPOTrainer, GRPOConfig

training_args = GRPOConfig(
    output_dir=OUTPUT_DIR,
    learning_rate=LEARNING_RATE,
    remove_unused_columns=False,
    per_device_train_batch_size=4,
    num_train_epochs=NUM_TRAIN_EPOCHS,
    bf16=False,
    logging_steps=1,
    save_strategy="steps",
    gradient_checkpointing=True,
    save_steps=100,
    num_generations=4,
    log_completions=True,
    max_prompt_length=1400,
    use_vllm=True,
    vllm_mode="colocate"
)

rich_callback = RichCompletionCallback()

trainer = GRPOTrainer(
    model=model,
    processing_class=processor,
    reward_funcs=reward_function,
    args=training_args,
    train_dataset=all_grpo_samples,
    callbacks=[rich_callback]
)

trainer.train()

INFO 08-01 18:45:54 [__init__.py:235] Automatically detected platform cuda.


No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


INFO 08-01 18:46:01 [config.py:3440] Downcasting torch.float32 to torch.bfloat16.
INFO 08-01 18:46:01 [config.py:1604] Using max model len 1656
INFO 08-01 18:46:01 [config.py:2119] Disabling V1 multiprocessing for external launcher.
INFO 08-01 18:46:01 [config.py:2434] Chunked prefill is enabled with max_num_batched_tokens=4096.
INFO 08-01 18:46:02 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='HuggingFaceTB/SmolVLM2-2.2B-Instruct', speculative_config=None, tokenizer='HuggingFaceTB/SmolVLM2-2.2B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=1656, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False,

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]


INFO 08-01 18:46:14 [default_loader.py:262] Loading weights took 6.27 seconds
INFO 08-01 18:46:14 [gpu_model_runner.py:1892] Model loading took 4.1967 GiB and 7.038194 seconds
INFO 08-01 18:46:14 [gpu_model_runner.py:2380] Encoder cache will be initialized with a budget of 4096 tokens, and profiled with 2 image items of the maximum feature size.
INFO 08-01 18:46:20 [backends.py:530] Using cache directory: /root/.cache/vllm/torch_compile_cache/ae156aaf11/rank_0_0/backbone for vLLM's torch.compile
INFO 08-01 18:46:20 [backends.py:541] Dynamo bytecode transform time: 4.72 s
INFO 08-01 18:46:24 [backends.py:161] Directly load the compiled graph(s) for dynamic shape from the cache, took 3.390 s
INFO 08-01 18:46:24 [monitor.py:34] torch.compile takes 4.72 s in total
INFO 08-01 18:46:25 [gpu_worker.py:255] Available KV cache memory: 9.26 GiB
INFO 08-01 18:46:25 [kv_cache_utils.py:833] GPU KV cache size: 50,576 tokens
INFO 08-01 18:46:25 [kv_cache_utils.py:837] Maximum concurrency for 1,656 to

Capturing CUDA graph shapes: 100%|██████████| 4/4 [00:00<00:00, 49.76it/s]

INFO 08-01 18:46:26 [gpu_model_runner.py:2485] Graph capturing finished in 0 secs, took 0.08 GiB
INFO 08-01 18:46:26 [core.py:193] init engine (profile, create kv cache, warmup model) took 11.60 seconds



[34m[1mwandb[0m: Currently logged in as: [33madityaharshit012345[0m ([33madityaharshit012345-indian-statistical-institute[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


INFO 08-01 18:46:29 [block_pool.py:321] Successfully reset prefix cache


Step,Training Loss
1,-0.8923
2,0.0
3,0.4655
4,0.0
5,0.0
6,0.0
7,-0.0384
8,0.0
9,0.0
10,0.7685


INFO 08-01 18:46:51 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:47:10 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:47:30 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:47:48 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:48:07 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:48:26 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:48:44 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:49:02 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:49:22 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:49:45 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:50:03 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:50:23 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:50:41 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:51:00 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:51:21 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:51:40 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:51:58 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:52:17 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:52:36 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:52:54 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:53:14 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:53:33 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:53:52 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:54:11 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:54:30 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:54:48 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:55:07 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:55:26 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:55:44 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:56:02 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:56:21 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:56:40 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:57:00 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:57:20 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:57:40 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:57:58 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:58:16 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:58:35 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:58:54 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:59:12 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:59:33 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 18:59:53 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:00:17 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:00:44 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:01:06 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:01:26 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:01:49 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:02:08 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:02:35 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:03:03 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:03:31 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:03:56 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:04:15 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:04:37 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:05:04 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:05:25 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:05:52 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:06:10 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:06:31 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:06:56 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:07:22 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:07:45 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:08:07 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:08:26 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:08:48 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:09:10 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:09:29 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:09:52 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:10:11 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:10:35 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:10:54 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:11:17 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:11:38 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:11:58 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:12:17 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:12:44 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:13:04 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:13:22 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:13:41 [block_pool.py:321] Successfully reset prefix cache


INFO 08-01 19:14:00 [block_pool.py:321] Successfully reset prefix cache


# GPU diagnostics

In [None]:
import torch

print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("GPU name:", torch.cuda.get_device_name(0))
print("Compute capability:", torch.cuda.get_device_properties(0).major,
                              torch.cuda.get_device_properties(0).minor)
