<a href="https://colab.research.google.com/github/Rewcifer/llama/blob/main/Llama_2_Learning_Rate_Exp_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-tune Llama 2 in Google Colab
> 🗣️ Large Language Model Course

❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.

This notebook runs on a T4 GPU. (Last update: 24 Aug 2023)


In [1]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [3]:
import os
import torch
from datasets import load_dataset, Dataset, IterableDataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

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

> Indented block



In [4]:
# #use if just trying to load the model

# !huggingface-cli login

In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
dataset = load_dataset("Rewcifer/trainset1_2000_cutoff_llama",split = "train")

In [7]:
validation_data = load_dataset("Rewcifer/validation_2000_cutoff_llama",split = "train")

In [8]:
dataset = dataset.select(range(0, 6000))

In [9]:
# The model that you want to train from the Hugging Face hub
model_name = "meta-llama/Llama-2-7b-hf"

# The instruction dataset to use
dataset_name = dataset

# Fine-tuned model name
new_model = "llama-2-7b-tyellow-2k-cutoff-LR1"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 2

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = True

# Batch size per GPU for training
per_device_train_batch_size = 8

# Batch size per GPU for evaluation
per_device_eval_batch_size = 8

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-5

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
#max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = False

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 100

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
# max_seq_length = None
#4056 is max
max_seq_length = 2000

# Pack multiple short examples in the same input sequence to increase efficiency
packing = True

# Load the entire model on the GPU 0
device_map = {"": 0}

In [None]:
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:100'

In [14]:
# Load dataset (you can process it here)
#dataset = load_dataset("Rewcifer/radio-llama2-resp_tag_90pct", split="train")

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map,
    use_auth_token = "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"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token = "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")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    #max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
#can add a validation set as well - can set early stopping parameters here as well
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)

Your GPU supports bfloat16: accelerate training with bf16=True




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
100,0.8673
200,0.5797
300,0.4645
400,0.428
500,0.4012
600,0.3813
700,0.3808
800,0.3669
900,0.3678


In [None]:
# if isinstance(dataset, Dataset):
#     print("The dataset is an instance of `Dataset`.")
# elif isinstance(dataset, IterableDataset):
#     print("The dataset is an instance of `IterableDataset`.")
# else:
#     print("The dataset type is not recognized.")

The dataset is an instance of `Dataset`.


In [None]:
# %load_ext tensorboard
# %tensorboard --logdir results/runs

In [None]:
# # Ignore warnings
# logging.set_verbosity(logging.CRITICAL)

# # Run text generation pipeline with our next model
# prompt = "anatomical entity_fin: {'parietal bone': {'location descriptor': 'right'}}location descriptor_fin: ['right']clinical findings_fin: {'prior examination , two burr holes have been placed within the right parietal bone': {'clinical finding': 'prior examination , two burr holes have been placed within the right parietal bone'}}anatomical entity_fin: {'subdural space': {}}location descriptor_fin: ['posterior', 'right']object_fin: ['drainage catheter']clinical findings_fin: {'more posterior burr hole': {'clinical finding': 'more posterior burr hole', 'existence': 'pos_dx', 'descriptive_term': 'more posterior burr', 'observation': 'hole', 'quantity_term': 'more'}, 'drainage catheter': {'clinical finding': 'drainage catheter', 'existence': 'pos_dx', 'descriptive_term': 'drainage', 'observation': 'catheter'}}anatomical entity_fin: {'scalp': {}}location descriptor_fin: ['right']clinical findings_fin: {'substantial right-sided scalp swelling': {'clinical finding': 'substantial right-sided scalp swelling', 'cf_snomed': {'swelling': 65124004}, 'existence': 'pos_dx', 'descriptive_term': 'substantial right-sided scalp', 'observation': 'swell'}, 'pneumocephalus': {'clinical finding': 'pneumocephalus', 'cf_snomed': {'pneumocephalus': 14415006}, 'existence': 'pos_dx', 'observation': 'pneumocephalus'}}location descriptor_fin: ['right']clinical findings_fin: {'previously seen isoattenuating right frontoparietal subdural': {'clinical finding': 'previously seen isoattenuating right frontoparietal subdural', 'existence': 'pos_dx', 'descriptive_term': 'previously seen isoattenuating right frontoparietal', 'observation': 'subdural'}}anatomical entity_fin: {'fluid': {}}anatomical entity_fin: {'fluid': {}}anatomical entity_fin: {'fluid': {}}imaging observation_fin: ['blood']object_fin: ['catheter']clinical findings_fin: {'more acute blood product': {'clinical finding': 'more acute blood product', 'existence': 'pos_dx', 'descriptive_term': 'more acute blood', 'observation': 'product', 'strength_term': 'acute', 'quantity_term': 'more', 'temporal_term': 'acute'}}size_fin: [{'size': '13 mm'}, {'size': '25 mm'}]imaging observation_fin: ['blood']clinical findings_fin: {'extra-axial blood': {'clinical finding': 'extra-axial blood', 'existence': 'pos_dx', 'descriptive_term': 'extra-axial', 'observation': 'blood'}}anatomical entity_fin: {'right cerebral hemisphere': {}}clinical findings_fin: {'mass-effect': {'clinical finding': 'mass-effect', 'existence': 'neg_dx', 'observation': 'mass'}}anatomical entity_fin: {'right lateral ventricle': {}}location descriptor_fin: ['midline', 'left']clinical findings_fin: {'ischemic injury': {'clinical finding': 'ischemic injury', 'existence': 'unc_dx', 'descriptive_term': 'ischemic', 'observation': 'injury'}}clinical findings_fin: {'new parenchymal abnormalities': {'clinical finding': 'new parenchymal abnormalities', 'existence': 'neg_dx', 'descriptive_term': 'new parenchymal', 'observation': 'abnormality', 'temporal_term': 'new'}}clinical findings_fin: {'ventricles are normal in size and morphology': {'clinical finding': 'ventricles are normal in size and morphology'}}"
# pipe = pipeline(task="text-generation", model=new_model, tokenizer=tokenizer, max_length=300)
# result = pipe(f"<s>[INST] {prompt} [/INST]")
# print(result[0]['generated_text'])

In [None]:
# result

In [15]:
# Empty VRAM
# del model
# del pipe
del trainer
import gc
gc.collect()
gc.collect()

50

In [16]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# !huggingface-cli login

# model.push_to_hub(new_model, use_temp_dir=False)
# tokenizer.push_to_hub(new_model, use_temp_dir=False)

In [17]:
model.push_to_hub(new_model, use_auth_token=True)

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Rewcifer/llama-2-7b-tyellow-2k-cutoff-LR1/commit/6de55aa4c03447bba92fcebdf6c26df398858b26', commit_message='Upload LlamaForCausalLM', commit_description='', oid='6de55aa4c03447bba92fcebdf6c26df398858b26', pr_url=None, pr_revision=None, pr_num=None)

# Run up to here if you want the model to push to huggingface automatically

# Run here after you have restarted your rutime to login huggingface to access the model

In [10]:
# Load model directly IF JUST CALLING MODEL AND NOT TRAINING
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained("llama-2-7b-tyellow-2k-cutoff-LR1")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Load model directly USE THIS AFTER MODEL IS TRAINED
# from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained("llama-2-7b-tyellow-resp-tag")

In [11]:
types = [' [/RESP] ',' [/RESP]  ', '[/RESP]',' [/RESP]', ' [/RESP]', '.[/RESP]', '. [/RESP]', '}[/RESP]', '-[/RESP]', '- [/RESP]', '*[/RESP]', '* [/RESP]', '! [/RESP]', '![/RESP]', '? [/RESP]', '?[/RESP]', '= [/RESP]', '=[/RESP]', '+ [/RESP]', '+[/RESP]', '^ [/RESP]', '^[/RESP]', '& [/RESP]', ',[/RESP]', ', [/RESP]', ';[/RESP]', '; [/RESP]', ': [/RESP]', ':[/RESP]', '< [/RESP]', '<[/RESP]', '> [/RESP]', '>[/RESP]', '( [/RESP]', '([/RESP]', ') [/RESP]', ')[/RESP]', '_[/RESP]']
# test1 = ["?[/RESP]"," [/RESP]"]
# test2 = ['?[/RESP]',' [/RESP]']

stop_tokens = []
for tokens in types:
  #print("Word = " + str(tokens))
  #print("tokens = " + str(tokenizer(tokens)))
  stop_tokens.append(tokenizer(tokens)['input_ids'])

stop_tokens

#stop_tokens = tokenizer("[/RESP]")

[[1, 29871, 518, 29914, 1525, 5550, 29962, 29871],
 [1, 29871, 518, 29914, 1525, 5550, 29962, 259],
 [1, 518, 29914, 1525, 5550, 29962],
 [1, 29871, 518, 29914, 1525, 5550, 29962],
 [1, 29871, 518, 29914, 1525, 5550, 29962],
 [1, 869, 29961, 29914, 1525, 5550, 29962],
 [1, 869, 518, 29914, 1525, 5550, 29962],
 [1, 500, 29961, 29914, 1525, 5550, 29962],
 [1, 448, 29961, 29914, 1525, 5550, 29962],
 [1, 448, 518, 29914, 1525, 5550, 29962],
 [1, 334, 29961, 29914, 1525, 5550, 29962],
 [1, 334, 518, 29914, 1525, 5550, 29962],
 [1, 1738, 518, 29914, 1525, 5550, 29962],
 [1, 1738, 29961, 29914, 1525, 5550, 29962],
 [1, 1577, 518, 29914, 1525, 5550, 29962],
 [1, 1577, 29961, 29914, 1525, 5550, 29962],
 [1, 353, 518, 29914, 1525, 5550, 29962],
 [1, 353, 29961, 29914, 1525, 5550, 29962],
 [1, 718, 518, 29914, 1525, 5550, 29962],
 [1, 718, 29961, 29914, 1525, 5550, 29962],
 [1, 6228, 518, 29914, 1525, 5550, 29962],
 [1, 6228, 29961, 29914, 1525, 5550, 29962],
 [1, 669, 518, 29914, 1525, 5550, 299

In [12]:
from transformers import StoppingCriteria, StoppingCriteriaList


stop_tokens_lists = stop_tokens
class UserTokenStoppingCriteria(StoppingCriteria):
  def __init__(self, stop_tokens_lists):
      super().__init__()
      self.stop_tokens_lists = stop_tokens_lists

  def __call__(self, input_ids, scores):
      for stop_token_ids in self.stop_tokens_lists:
          tail_of_input = input_ids[0, -len(stop_token_ids):]
          if torch.equal(tail_of_input, torch.tensor(stop_token_ids).to(input_ids.device)):
              return True
      return False


stopping_criteria_list = StoppingCriteriaList([UserTokenStoppingCriteria(stop_tokens_lists=stop_tokens_lists)])

In [13]:
torch.cuda.empty_cache()

from transformers import TextStreamer
torch.cuda.empty_cache()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = model.to(device)
# inputs = inputs.to(device)

def stream(user_prompt):
    runtimeFlag = device
    system_prompt = "Write the finding section for a radiology report based on the following information:"
    # B_INST, E_INST = "[INST]", "[/INST]"
    prompt = f"<s>[INST] {system_prompt}{user_prompt} [/INST] [RESP]"
    # prompt = f"{system_prompt}{B_INST}{user_prompt.strip()}\n{E_INST}"
    inputs = tokenizer([prompt], return_tensors="pt").to(runtimeFlag)
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    _ = model.generate(**inputs, streamer=streamer, max_new_tokens=2000)

# stream("'input': location descriptor_fin: ['right', 'left']object_fin: ['shunt catheter']clinical findings_fin: 'right parietal approach ventricular shunt catheter': 'clinical finding': 'right parietal approach ventricular shunt catheter', 'existence': 'pos_dx', 'descriptive_term': 'right parietal approach ventricular shunt', 'observation': 'catheter'anatomical entity_fin: 'right lateral ventricle': location descriptor_fin: ['left']clinical findings_fin: 'significant interval change': 'clinical finding': 'significant interval change', 'existence': 'neg_dx', 'descriptive_term': 'significant interval', 'observation': 'change', 'collapsed_1': 'clinical finding': 'collapsed', 'cf_snomed': 'collapse': 271787007, 'existence': 'pos_dx', 'observation': 'collapse', 'collapsed_2': 'clinical finding': 'collapsed', 'cf_snomed': 'collapse': 271787007, 'existence': 'pos_dx', 'observation': 'collapse'object_fin: ['shunt catheter']clinical findings_fin: 'shunt catheter': 'clinical finding': 'shunt catheter', 'existence': 'pos_dx', 'descriptive_term': 'shunt', 'observation': 'catheter', 'intact': 'clinical finding': 'intact', 'existence': 'neg_dx', 'observation': 'intact'anatomical entity_fin: 'tonsils': location descriptor_fin: ['posterior']clinical findings_fin: 'stable postoperative': 'clinical finding': 'stable postoperative', 'existence': 'pos_dx', 'descriptive_term': 'stable', 'observation': 'postoperative', 'strength_term': 'stable', 'change_term': 'stable', 'suboccipital craniectomy': 'clinical finding': 'suboccipital craniectomy', 'existence': 'pos_dx', 'descriptive_term': 'suboccipital', 'observation': 'craniectomy'location descriptor_fin: ['midline']clinical findings_fin: 'mass': 'clinical finding': 'mass', 'existence': 'neg_dx', 'observation': 'mass'clinical findings_fin: 'intracranial hemorrhage': 'clinical finding': 'intracranial hemorrhage', 'cf_snomed': 'intracranial hemorrhage': 1386000, 'existence': 'neg_dx', 'descriptive_term': 'intracranial', 'observation': 'hemorrhage'clinical findings_fin: 'extraaxial fluid collection': 'clinical finding': 'extraaxial fluid collection', 'existence': 'neg_dx', 'descriptive_term': 'extraaxial fluid', 'observation': 'collection'anatomical entity_fin: 'paranasal sinuses': , 'middle ears': clinical findings_fin: 'clear': 'clinical finding': 'clear', 'existence': 'neg_dx', 'observation': 'clear'")

In [None]:
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
# model = model.to(device)
# # tokenizer = tokenizer.to(device)  # Only if your tokenizer requires it, typically it doesn't.
# # inputs = inputs.to(device)

In [14]:
torch.cuda.empty_cache()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# model = model.to(device)

# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
prompt = "Write the finding section for a radiology report based on the following information: input: anatomical entity_fin: 'right lung': , 'base': clinical findings_fin: 'atelectatic changes': 'clinical finding': 'atelectatic changes', 'existence': 'pos_dx', 'descriptive_term': 'atelectatic', 'observation': 'change'anatomical entity_fin: 'right atrium': non-anatomical substance_fin: ['leads']object_fin: ['pacemaker']imaging observation_fin: ['infiltrates']clinical findings_fin: 'acute infiltrates': 'clinical finding': 'acute infiltrates', 'existence': 'neg_dx', 'descriptive_term': 'acute', 'observation': 'infiltrate', 'strength_term': 'acute', 'temporal_term': 'acute', 'effusions': 'clinical finding': 'effusions', 'existence': 'neg_dx', 'observation': 'effusion'clinical findings_fin: 'large hiatal hernia': 'clinical finding': 'large hiatal hernia', 'existence': 'pos_dx', 'descriptive_term': 'large hiatal', 'observation': 'hernia', 'strength_term': 'large'anatomical entity_fin: 'esophagus': clinical findings_fin: 'patulous esophagus': 'clinical finding': 'patulous esophagus'anatomical entity_fin: 'gallbladder': clinical findings_fin: 'cholelithiasis': 'clinical finding': 'cholelithiasis', 'existence': 'pos_dx', 'observation': 'cholelithiasis', 'gallbladder sludge': 'clinical finding': 'gallbladder sludge', 'existence': 'pos_dx', 'descriptive_term': 'gallbladder', 'observation': 'sludge'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'size_fin: ['size': '1.8 cm', 'size_descriptor': 'adrenal']location descriptor_fin: ['left']location descriptor_fin: ['right']clinical findings_fin: 'multiple right renal calculus': 'clinical finding': 'multiple right renal calculus', 'existence': 'pos_dx', 'descriptive_term': 'multiple right renal', 'observation': 'calculus', 'quantity_term': 'multiple', 'right hydronephrosis': 'clinical finding': 'right hydronephrosis', 'existence': 'pos_dx', 'descriptive_term': 'right', 'observation': 'hydronephrosis', 'hydroureter': 'clinical finding': 'hydroureter', 'existence': 'pos_dx', 'observation': 'hydroureter'size_fin: ['size': '8-mm', 'size_descriptor': 'calculus']anatomical entity_fin: 'ureter': 'location descriptor': 'right proximal'location descriptor_fin: ['right', 'proximal']clinical findings_fin: '8-mm diameter calculus': 'clinical finding': '8-mm diameter calculus', 'existence': 'pos_dx', 'descriptive_term': '8-mm diameter', 'observation': 'calculus'clinical findings_fin: 'obstruction': 'clinical finding': 'obstruction', 'existence': 'unc_dx', 'observation': 'obstruction'size_fin: ['size': '3.7 cm', 'size_descriptor': 'mass']location descriptor_fin: ['right']clinical findings_fin: 'amorphous mass': 'clinical finding': 'amorphous mass', 'existence': 'pos_dx', 'descriptive_term': 'amorphous', 'observation': 'mass', 'calcifications': 'clinical finding': 'calcifications', 'existence': 'pos_dx', 'observation': 'calcification'clinical findings_fin: 'atypical cyst': 'clinical finding': 'atypical cyst', 'existence': 'unc_dx', 'descriptive_term': 'atypical', 'observation': 'cyst', 'xanthogranulomatous pyelonephritis': 'clinical finding': 'xanthogranulomatous pyelonephritis', 'existence': 'unc_dx', 'descriptive_term': 'xanthogranulomatous', 'observation': 'pyelonephritis'location descriptor_fin: ['bilateral']clinical findings_fin: 'multiple additional bilateral renal cortical hypodensities': 'clinical finding': 'multiple additional bilateral renal cortical hypodensities', 'existence': 'pos_dx', 'descriptive_term': 'multiple additional bilateral renal cortical', 'observation': 'hypodensity', 'quantity_term': 'multiple', 'cysts': 'clinical finding': 'cysts', 'existence': 'pos_dx', 'observation': 'cyst'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'clinical findings_fin: 'large hiatal hernia': 'clinical finding': 'large hiatal hernia', 'existence': 'pos_dx', 'descriptive_term': 'large hiatal', 'observation': 'hernia', 'strength_term': 'large'anatomical entity_fin: 'esophagus': clinical findings_fin: 'patulous esophagus': 'clinical finding': 'patulous esophagus'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'clinical findings_fin: 'diverticulosis': 'clinical finding': 'diverticulosis', 'existence': 'pos_dx', 'observation': 'diverticulosis', 'diverticulitis': 'clinical finding': 'diverticulitis', 'existence': 'neg_dx', 'observation': 'diverticulitis'anatomical entity_fin: 'hip': location descriptor_fin: ['bilateral']clinical findings_fin: 'bilateral total hip replacement prostheses': 'clinical finding': 'bilateral total hip replacement prostheses', 'existence': 'pos_dx', 'descriptive_term': 'bilateral total hip replacement', 'observation': 'prosthes'location descriptor_fin: ['left']clinical findings_fin: 'bone remodeling': 'clinical finding': 'bone remodeling', 'existence': 'pos_dx', 'descriptive_term': 'bone', 'observation': 'remodeling'location descriptor_fin: ['adjacent']clinical findings_fin: 'metal streak artifact': 'clinical finding': 'metal streak artifact', 'existence': 'pos_dx', 'descriptive_term': 'metal streak', 'observation': 'artifact'anatomical entity_fin: 'lumbosacral spine': , 'pelvis': clinical findings_fin: 'degenerative changes': 'clinical finding': 'degenerative changes', 'existence': 'pos_dx', 'descriptive_term': 'degenerative', 'observation': 'change'clinical findings_fin: 'significant abnormality': 'clinical finding': 'significant abnormality', 'existence': 'neg_dx', 'descriptive_term': 'significant', 'observation': 'abnormality'"
pipe = pipeline(task="text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=4000,
                stopping_criteria = stopping_criteria_list,
                device = device,
                streamer=streamer,
                temperature = 0.1,
                top_p = 0.5,
                repetition_penalty=1.15)
result = pipe(f"<s>[INST] {prompt} [/INST] [RESP]")

. Atelectatic changes are noted in the base of the right lung, which is compatible with prior pneumonectomy. Acute infiltrates are not seen. Patulous esophagus and gallbladder sludge are also present. No significant abnormality noted. No significant abnormality noted. Multiple right renal calculi measuring 2 to 4 mm in size are again identified within the right kidney, consistent with right hydronephrosis. An 8-mm diameter calculus is again identified within the right proximal ureter, consistent with obstruction. Amorphous mass measuring approximately 5 x 6 cm is again identified within the right lower lobe, likely representing calcified granulation tissue or xanthogranulomatous pyelonephritis. Multiple additional bilateral renal cortical hypodensities are again identified, most likely representing multiple small cysts. No significant abnormality noted. Large hiatal hernia is again present. The esophageal lumen measures 90 mm in caliber. Patulous esophagus is again present. No signific

In [15]:
result

[{'generated_text': "<s>[INST] Write the finding section for a radiology report based on the following information: input: anatomical entity_fin: 'right lung': , 'base': clinical findings_fin: 'atelectatic changes': 'clinical finding': 'atelectatic changes', 'existence': 'pos_dx', 'descriptive_term': 'atelectatic', 'observation': 'change'anatomical entity_fin: 'right atrium': non-anatomical substance_fin: ['leads']object_fin: ['pacemaker']imaging observation_fin: ['infiltrates']clinical findings_fin: 'acute infiltrates': 'clinical finding': 'acute infiltrates', 'existence': 'neg_dx', 'descriptive_term': 'acute', 'observation': 'infiltrate', 'strength_term': 'acute', 'temporal_term': 'acute', 'effusions': 'clinical finding': 'effusions', 'existence': 'neg_dx', 'observation': 'effusion'clinical findings_fin: 'large hiatal hernia': 'clinical finding': 'large hiatal hernia', 'existence': 'pos_dx', 'descriptive_term': 'large hiatal', 'observation': 'hernia', 'strength_term': 'large'anatomic

In [None]:
token_ids = [60305]
# # Tokenize a text
# encoded_text = tokenizer("Hello, world!", return_tensors="pt")

# # Get token IDs
# token_ids = encoded_text['input_ids'][0].tolist()

# Decode the token IDs back to text
decoded_text = tokenizer.decode(token_ids, skip_special_tokens=False)
print(decoded_text)




In [None]:
# print(result[0]['generated_text'])

In [None]:
print(result)

[{'generated_text': "<s>[INST] Write the finding section for a radiology report based on the following information: anatomical entity_fin: {'parietal bone': {'location descriptor': 'right'}}location descriptor_fin: ['right']clinical findings_fin: {'prior examination , two burr holes have been placed within the right parietal bone': {'clinical finding': 'prior examination , two burr holes have been placed within the right parietal bone'}}anatomical entity_fin: {'subdural space': {}}location descriptor_fin: ['posterior', 'right']object_fin: ['drainage catheter']clinical findings_fin: {'more posterior burr hole': {'clinical finding': 'more posterior burr hole', 'existence': 'pos_dx', 'descriptive_term': 'more posterior burr', 'observation': 'hole', 'quantity_term': 'more'}, 'drainage catheter': {'clinical finding': 'drainage catheter', 'existence': 'pos_dx', 'descriptive_term': 'drainage', 'observation': 'catheter'}}anatomical entity_fin: {'scalp': {}}location descriptor_fin: ['right']cli