In [2]:
import os
os.environ['CC'] = os.path.join(os.environ['CONDA_PREFIX'], 'bin', 'x86_64-conda-linux-gnu-gcc')
os.environ['CXX'] = os.path.join(os.environ['CONDA_PREFIX'], 'bin', 'x86_64-conda-linux-gnu-g++')
import torch
print(torch.version.cuda)
from PIL import Image
import requests
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor
from datasets import load_dataset
from evaluate import load
import pandas as pd
import os
from peft import LoraConfig, get_peft_model
from transformers import TrainerCallback
from datasets import Dataset, Features, Array2D
from torch.utils.data import DataLoader
import logging
from transformers import TextStreamer
import os
os.environ["WANDB_DISABLED"] = "true"


if torch.cuda.is_available():
    print("CUDA is available!")
    print(f"Number of GPUs: {torch.cuda.device_count()}")

    # List details for each GPU
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
    torch.cuda.empty_cache()  # Clears unused memory
    torch.cuda.reset_max_memory_allocated()
import sys
print(f"Python version: {sys.version}")
    

    


12.1
CUDA is available!
Number of GPUs: 1
GPU 0: NVIDIA A100-SXM4-80GB
Python version: 3.10.15 (main, Oct  3 2024, 07:27:34) [GCC 11.2.0]




In [3]:
# import shutil
# import os

# # Delete specific directories
# if os.path.exists("outputs"):
#     shutil.rmtree("outputs")
# if os.path.exists("runs"):
#     shutil.rmtree("runs")
# del model
# import gc
# gc.collect()


In [4]:
# Check current GPU memory usage
import torch
torch.cuda.memory_summary()

# For a simpler view of allocated memory
print(f"Allocated: {torch.cuda.memory_allocated(0)/1024**2:.2f} MB")
print(f"Cached: {torch.cuda.memory_reserved(0)/1024**2:.2f} MB")

Allocated: 0.00 MB
Cached: 0.00 MB


In [5]:
import pandas as pd
import numpy as np
from tqdm import tqdm

def generate_single_summary(
    image,
    model,
    tokenizer,
    instruction="You are an expert radiographer. Describe accurately what you see in this image in detail.",
    max_new_tokens=120,
    temperature=1.5,
    min_p=0.1,
    device="cuda"
):
    
    """
    Generate a summary for a single image using LLaMA.
    
    Args:
        image: The input image
        model: The LLaMA model
        tokenizer: The LLaMA tokenizer
        instruction: The prompt instruction
        max_new_tokens: Maximum number of tokens to generate
        temperature: Sampling temperature
        min_p: Minimum probability threshold for sampling
        device: Device to run generation on
        
    Returns:
        str: Generated summary text
    """
    messages = [
        {"role": "user", "content": [
            {"type": "image"},
            {"type": "text", "text": instruction}
        ]}
    ]
    
    input_text = tokenizer.apply_chat_template(
        messages, 
        add_generation_prompt=True
    )
    
    inputs = tokenizer(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors="pt"
    ).to("cuda")
    
    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
    outputs = model.generate(
        **inputs,
        streamer=text_streamer,
        max_new_tokens=max_new_tokens,
        use_cache=True,
        temperature=temperature,
        min_p=min_p
    )
    
    # Decode and return the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return generated_text


def generate_dataset_summaries(
    val_dataset,
    model,
    tokenizer,
    instruction="You are an expert radiographer. Describe accurately what you see in this image.",
    device="cuda"
):
    """
    Generate summaries for all images in a validation dataset and compare with real summaries.
    
    Args:
        val_dataset: The validation dataset containing images and their summaries
        model: The LLaMA model
        tokenizer: The LLaMA tokenizer
        instruction: The prompt instruction
        device: Device to run generation on
        
    Returns:
        pandas.DataFrame: DataFrame with Generated_Summary and Real_Summary columns
    """
    FastVisionModel.for_inference(model)
    generated_summaries = []
    real_summaries = []
    
    # Process each item in the dataset
    for idx in tqdm(range(len(val_dataset)), desc="Generating Summaries"):
        # Get image and real summary from dataset
        item = val_dataset[idx]
        image = item["image"]
        real_summary = item["Content"]  # Adjust this based on your dataset's attribute name
        
        # Generate summary for current image
        generated_summary = generate_single_summary(
            image=image,
            model=model,
            tokenizer=tokenizer,
            instruction=instruction,
            device=device
        )
        
        # Store results
        generated_summaries.append(generated_summary)
        real_summaries.append(real_summary)
    
    # Create DataFrame
    results_df = pd.DataFrame({
        'Generated_Summary': generated_summaries,
        'Real_Summary': real_summaries
    })
    
    return results_df



In [6]:
import os

# Print current CONDA_PREFIX to see what it's set to
print(f"Current CONDA_PREFIX: {os.environ.get('CONDA_PREFIX', 'Not set')}")

# We know the compiler exists here from our earlier tests
expected_prefix = "/home/koehler.ale/.conda/envs/change"

# Override CONDA_PREFIX if it's wrong
if os.environ.get('CONDA_PREFIX') != expected_prefix:
    os.environ['CONDA_PREFIX'] = expected_prefix

conda_gcc = os.path.join(os.environ['CONDA_PREFIX'], 'bin', 'x86_64-conda-linux-gnu-gcc')
conda_gxx = os.path.join(os.environ['CONDA_PREFIX'], 'bin', 'x86_64-conda-linux-gnu-g++')

# Verify paths exist
if not os.path.exists(conda_gcc) or not os.path.exists(conda_gxx):
    raise FileNotFoundError(f"GCC/G++ not found at expected paths: {conda_gcc}, {conda_gxx}")

# Set environment variables
os.environ['CC'] = conda_gcc
os.environ['CXX'] = conda_gxx
os.environ['GCC_PATH'] = os.path.dirname(conda_gcc)

# Print paths to verify
print(f"Using GCC at: {os.environ['CC']}")
print(f"Using G++ at: {os.environ['CXX']}")

Current CONDA_PREFIX: /shared/centos7/anaconda3/2021.05
Using GCC at: /home/koehler.ale/.conda/envs/change/bin/x86_64-conda-linux-gnu-gcc
Using G++ at: /home/koehler.ale/.conda/envs/change/bin/x86_64-conda-linux-gnu-g++


In [7]:
from unsloth import FastVisionModel # FastLanguageModel for LLMs
import torch

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", # Llama 3.2 vision support
    "unsloth/Llama-3.2-11B-Vision-bnb-4bit",
    "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit", # Can fit in a 80GB card!
    "unsloth/Llama-3.2-90B-Vision-bnb-4bit",

    "unsloth/Pixtral-12B-2409-bnb-4bit",              # Pixtral fits in 16GB!
    "unsloth/Pixtral-12B-Base-2409-bnb-4bit",         # Pixtral base model

    "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit",          # Qwen2 VL support
    "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
    "unsloth/Qwen2-VL-72B-Instruct-bnb-4bit",

    "unsloth/llava-v1.6-mistral-7b-hf-bnb-4bit",      # Any Llava variant works!
    "unsloth/llava-1.5-7b-hf-bnb-4bit",
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastVisionModel.from_pretrained(
    "unsloth/Llama-3.2-11B-Vision-Instruct",
    load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for long context
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using

==((====))==  Unsloth 2024.11.10: Fast Mllama vision patching. Transformers: 4.46.3.
   \\   /|    GPU: NVIDIA A100-SXM4-80GB. Max memory: 79.15 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 8.0. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers     = True, # False if not finetuning vision layers
    finetune_language_layers   = True, # False if not finetuning language layers
    finetune_attention_modules = True, # False if not finetuning attention layers
    finetune_mlp_modules       = True, # False if not finetuning MLP layers

    r = 20,           # The larger, the higher the accuracy, but might overfit
    lora_alpha = 20,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
    # target_modules = "all-linear", # Optional now! Can specify a list if needed
)

In [9]:
from datasets import load_from_disk

def load_scratch_dataset():
    """
    Load the dataset from scratch directory
    
    Returns:
        Dataset: Loaded dataset with PIL images
    """
    load_path = "/scratch/koehler.ale/xray_dataset"
    
    if not os.path.exists(load_path):
        raise FileNotFoundError(f"No dataset found at {load_path}")
    
    print(f"Loading dataset from {load_path}")
    dataset = load_from_disk(load_path)
    print(f"Loaded dataset with {len(dataset)} examples")
    print(f"First image type: {type(dataset[0]['image'])}")
    print(dataset[0]['Content'])
    print(dataset.column_names)
    return dataset

dataset = load_scratch_dataset()
split_dataset = dataset.train_test_split(test_size=0.1, seed=42)

# Extract the training and validation sets
train1_dataset = split_dataset["train"]
val1_dataset = split_dataset["test"]

print(f"Training set size: {len(train1_dataset)}")
print(f"Validation set size: {len(val1_dataset)}")
print(train1_dataset.column_names)

Loading dataset from /scratch/koehler.ale/xray_dataset
Loaded dataset with 2847 examples
First image type: <class 'PIL.PngImagePlugin.PngImageFile'>
Bilateral hazy opacities interstitial are visualized and likely representative of fibrotic changes. Otherwise the lungs are without a focal consolidation, effusion, or pneumothorax. Cardiomediastinal silhouette is within normal limits. No acute fractures are identified. No evidence of acute injury. Bilateral hazy interstitial opacities are likely representative of fibrotic changes.
['Content', 'image']
Training set size: 2562
Validation set size: 285
['Content', 'image']


In [10]:
instruction = "You are an expert radiographer. Describe accurately what you see in this image in detail."

def convert_to_conversation(sample):
    conversation = [
        { "role": "user",
          "content" : [
            {"type" : "text",  "text"  : instruction},
            {"type" : "image", "image" : sample["image"]} ]
        },
        { "role" : "assistant",
          "content" : [
            {"type" : "text",  "text"  : sample["Content"]} ]
        },
    ]
    return { "messages" : conversation }
pass

In [11]:
train_dataset = [convert_to_conversation(sample) for sample in train1_dataset]
val_dataset = [convert_to_conversation(sample) for sample in val1_dataset]


In [20]:
from unsloth import is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig

FastVisionModel.for_training(model) # Enable for training!
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator = UnslothVisionDataCollator(model, tokenizer),
    train_dataset = train_dataset,
    eval_dataset = val_dataset,
    args = SFTConfig(
        num_train_epochs = 2,
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 200,
        learning_rate = 1e-4,
        fp16 = not is_bf16_supported(),
        bf16 = is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        dataset_num_proc = 4,
        max_seq_length = 2048,
        report_to="none",
        eval_strategy = "steps",
        eval_steps = 10,  
    ),
)


Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
max_steps is given, it will override any value given in num_train_epochs


In [21]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA A100-SXM4-80GB. Max memory = 79.15 GB.
21.135 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 2,562 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 200
 "-____-"     Number of trainable parameters = 83,968,000
🦥 Unsloth needs about 1-3 minutes to load everything - please wait!


Step,Training Loss,Validation Loss
10,1.131,1.001033
20,1.0161,0.901035


In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
FastVisionModel.for_inference(model) # Enable for inference!

image = val1_dataset[50]["image"]
instruction = "You are an expert radiographer. Describe accurately what you see in this image in detail."

messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
    image,
    input_text,
    add_special_tokens = False,
    return_tensors = "pt",
).to("cuda")

text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

In [None]:
TrainedResults = generate_dataset_summaries(val1_dataset, model, tokenizer)


In [None]:
def save_dataframe(df, save_path):
    """
    Save a DataFrame to a specified directory.
    
    Args:
        df (pd.DataFrame): The DataFrame to save.
        save_path (str): The file path to save the DataFrame (including the filename).
    
    Returns:
        None
    """
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    
    df.to_csv(save_path, index=False)
    print(f"DataFrame saved to {save_path}")
save_dataframe(UntrainedResults,"/scratch/koehler.ale/Untrained_DF.csv")



In [None]:
load_path = "/scratch/koehler.ale/Untrained_DF.csv"
Untrained_df = load_dataframe(load_path)
Untrained_df.rename(columns={"Generated_Summary":"Untrained_Summary"})
Untrained_df.head()


In [None]:
FinalMIMICDF = pd.merge(Untrained_df, TrainedResults, on="Real_Summary", how="inner")

In [None]:
save_dataframe(FinalMIMICDF,"/scratch/koehler.ale/FinalMIMICDF.csv")