In [1]:
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def find_all_linear_names(model):
    cls = torch.nn.Linear
    lora_module_names = set()
    # Process of elimination: LoRA only targets on LLM backbone
    ignore_keywords = ['vision_tower', 'mm_projector', 'embed_tokens', 'lm_head', 'seg_projector', 'seg_module']
    for name, module in model.named_modules():
        if any(mm_keyword in name for mm_keyword in ignore_keywords):
            continue
        if isinstance(module, cls):
            lora_module_names.add(name)
    return list(lora_module_names)

In [3]:
device = torch.device('cuda') # 'cpu', 'cuda'
dtype = torch.bfloat16 # or bfloat16, float16, float32

model_name_or_path = '/import/c4dm-04/siyoul/Med3DLLM/checkpoint/amosmm_chatgpt_stage_1/checkpoint-100080'
proj_out_num = 256

base_model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    torch_dtype=dtype,
    device_map='auto',
    trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path,
    model_max_length=512,
    padding_side="right",
    use_fast=False,
    trust_remote_code=True
)

Some weights of the model checkpoint at /import/c4dm-04/siyoul/Med3DLLM/checkpoint/amosmm_chatgpt_stage_1/checkpoint-100080 were not used when initializing LamedPhi3ForCausalLM: ['base_model.model.lm_head.weight', 'base_model.model.model.embed_tokens.weight', 'base_model.model.model.layers.0.input_layernorm.weight', 'base_model.model.model.layers.0.mlp.down_proj.base_layer.weight', 'base_model.model.model.layers.0.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.0.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.0.mlp.gate_up_proj.base_layer.weight', 'base_model.model.model.layers.0.mlp.gate_up_proj.lora_A.default.weight', 'base_model.model.model.layers.0.mlp.gate_up_proj.lora_B.default.weight', 'base_model.model.model.layers.0.post_attention_layernorm.weight', 'base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.o

In [2]:
device = torch.device('cuda') # 'cpu', 'cuda'
dtype = torch.bfloat16 # or bfloat16, float16, float32

model_name_or_path = '/import/c4dm-04/siyoul/Med3DLLM/checkpoint/amosmm_chatgpt_stage_1/checkpoint-100080'
lora_model_path = '/import/c4dm-04/siyoul/Med3DLLM/checkpoint/amosmm_chatgpt_stage_1/model_with_lora.bin'
state_dict = torch.load(lora_model_path, map_location="cpu")

base_model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    torch_dtype=dtype,
    device_map='auto',
    trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path,
    model_max_length=512,
    padding_side="right",
    use_fast=False,
    trust_remote_code=True
)
if lora_model_path is not None:
    
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=find_all_linear_names(base_model),
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )
    print("Adding LoRA adapters only on LLM.")
    model = get_peft_model(base_model, lora_config)
    # lamed_model.print_trainable_parameters()
    print("Load weights with LoRA")
    model.load_state_dict(state_dict, strict=True)
    print("Merge weights with LoRA")
    model = model.merge_and_unload()

Some weights of the model checkpoint at /import/c4dm-04/siyoul/Med3DLLM/checkpoint/amosmm_chatgpt_stage_1/checkpoint-100080 were not used when initializing LamedPhi3ForCausalLM: ['base_model.model.lm_head.weight', 'base_model.model.model.embed_tokens.weight', 'base_model.model.model.layers.0.input_layernorm.weight', 'base_model.model.model.layers.0.mlp.down_proj.base_layer.weight', 'base_model.model.model.layers.0.mlp.down_proj.lora_A.default.weight', 'base_model.model.model.layers.0.mlp.down_proj.lora_B.default.weight', 'base_model.model.model.layers.0.mlp.gate_up_proj.base_layer.weight', 'base_model.model.model.layers.0.mlp.gate_up_proj.lora_A.default.weight', 'base_model.model.model.layers.0.mlp.gate_up_proj.lora_B.default.weight', 'base_model.model.model.layers.0.post_attention_layernorm.weight', 'base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight', 'base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight', 'base_model.model.model.layers.0.self_attn.o

NameError: name 'find_all_linear_names' is not defined

In [4]:
for name, param in base_model.named_parameters():
    print(name, param.size())
print(len(list(base_model.parameters())))   

model.embed_tokens.weight torch.Size([32015, 3072])
model.layers.0.self_attn.o_proj.weight torch.Size([3072, 3072])
model.layers.0.self_attn.qkv_proj.weight torch.Size([9216, 3072])
model.layers.0.mlp.gate_up_proj.weight torch.Size([16384, 3072])
model.layers.0.mlp.down_proj.weight torch.Size([3072, 8192])
model.layers.0.input_layernorm.weight torch.Size([3072])
model.layers.0.post_attention_layernorm.weight torch.Size([3072])
model.layers.1.self_attn.o_proj.weight torch.Size([3072, 3072])
model.layers.1.self_attn.qkv_proj.weight torch.Size([9216, 3072])
model.layers.1.mlp.gate_up_proj.weight torch.Size([16384, 3072])
model.layers.1.mlp.down_proj.weight torch.Size([3072, 8192])
model.layers.1.input_layernorm.weight torch.Size([3072])
model.layers.1.post_attention_layernorm.weight torch.Size([3072])
model.layers.2.self_attn.o_proj.weight torch.Size([3072, 3072])
model.layers.2.self_attn.qkv_proj.weight torch.Size([9216, 3072])
model.layers.2.mlp.gate_up_proj.weight torch.Size([16384, 30

In [5]:
lora_model_path = '/import/c4dm-04/siyoul/Med3DLLM/checkpoint/amosmm_chatgpt_stage_1/model_with_lora.bin'
state_dict = torch.load(lora_model_path, map_location="cpu")
for k,v in list(state_dict.items()):
    print(k, v.size())  

base_model.model.model.embed_tokens.weight torch.Size([32015, 3072])
base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight torch.Size([3072, 3072])
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight torch.Size([16, 3072])
base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight torch.Size([3072, 16])
base_model.model.model.layers.0.self_attn.qkv_proj.base_layer.weight torch.Size([9216, 3072])
base_model.model.model.layers.0.self_attn.qkv_proj.lora_A.default.weight torch.Size([16, 3072])
base_model.model.model.layers.0.self_attn.qkv_proj.lora_B.default.weight torch.Size([9216, 16])
base_model.model.model.layers.0.mlp.gate_up_proj.base_layer.weight torch.Size([16384, 3072])
base_model.model.model.layers.0.mlp.gate_up_proj.lora_A.default.weight torch.Size([16, 3072])
base_model.model.model.layers.0.mlp.gate_up_proj.lora_B.default.weight torch.Size([16384, 16])
base_model.model.model.layers.0.mlp.down_proj.base_layer.weight torch.Size([3072, 8

In [6]:

if lora_model_path is not None:
        
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=find_all_linear_names(base_model),
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )
    print("Adding LoRA adapters only on LLM.")
    model = get_peft_model(base_model, lora_config)
    # lamed_model.print_trainable_parameters()
    print("Load weights with LoRA")
    model.load_state_dict(state_dict, strict=True)
    print("Merge weights with LoRA")
    model = model.merge_and_unload()
model = model.to(device=device)


Adding LoRA adapters only on LLM.
Load weights with LoRA
Merge weights with LoRA


In [7]:
model

LamedPhi3ForCausalLM(
  (model): LamedPhi3Model(
    (embed_tokens): Embedding(32015, 3072, padding_idx=32000)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear(in_features=3072, out_features=9216, bias=False)
          (rotary_emb): Phi3RotaryEmbedding()
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLU()
        )
        (input_layernorm): Phi3RMSNorm()
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
        (post_attention_layernorm): Phi3RMSNorm()
      )
    )
    (norm): Phi3RMSNorm()
    (vision_tower): ViT3DTower(
      (vision_tow

In [8]:
from src.utils.linear_3d_transform import Linear3DTransform
l3dt = Linear3DTransform(data_type="validation")
image_file_path = "/import/c4dm-04/siyoul/Med3DLLM/datasets/AMOS-MM/imagesVa/amos_0008.nii.gz"
image = l3dt(image_file_path)
print(image.shape)



torch.Size([8, 32, 256, 256])


In [9]:
# question = "Can you provide a caption consists of findings for this medical image?"
question = "Can you provide a diagnosis based on the fingings in chest in this image?."
# question = "What is liver in this image? Please output the box."
question_ids = tokenizer(
            question, add_special_tokens=False, max_length=768, truncation=True, padding="max_length", return_tensors="pt", padding_side="right"
        )["input_ids"][0]
image_tokens = "<im_patch>" * proj_out_num
input_txt = image_tokens + question
input_id = tokenizer(input_txt, return_tensors="pt")['input_ids'].to(device=device)


# generation = model.generate(image_pt, input_id, max_new_tokens=256, do_sample=True, top_p=0.9, temperature=1.0)
with torch.cuda.amp.autocast(): 
    generation = model.generate(image.unsqueeze(0).to(device=device), input_id, question_ids=question_ids.to(device=device), max_new_tokens=768, do_sample=True, top_p=0.9, temperature=1.0)

generated_texts = tokenizer.batch_decode(generation, skip_special_tokens=True)

print('question', question)
print('generated_texts', generated_texts[0])

Keyword arguments {'padding_side': 'right'} not recognized.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
You are not running the flash-attention implementation, expect numerical differences.


question Can you provide a diagnosis based on the fingings in chest in this image?.
generated_texts The CT report indicates the presence of pericardial and pleural effusions, along with localized incomplete expansion in the lower lobes of both lungs.


In [36]:
from monai.transforms import (
        LoadImage,
        Compose,
        CropForeground,
        ToTensor,
        SaveImage,
        ScaleIntensityRangePercentiles,
        RandRotate90,
        RandFlip,
        NormalizeIntensity,
        RandScaleIntensity,
        RandShiftIntensity,
        Resize,
        Transpose,
    )
from monai.data.image_reader import NibabelReader

transforms = Compose(
                [
                LoadImage(image_only=True, ensure_channel_first=False, reader=NibabelReader()),
                # Transpose(indices=(2, 0, 1)),
                ScaleIntensityRangePercentiles(lower=0.5, upper=99.5, b_max=1.0, b_min=0.0, clip=True),
                CropForeground(source_key="image"),
                #Resize(spatial_size=[32, 256,256],mode='trilinear'),
                RandRotate90(prob=0.5, spatial_axes=(1, 2)),
                RandFlip(prob=0.10, spatial_axis=0),
                RandFlip(prob=0.10, spatial_axis=1),
                RandFlip(prob=0.10, spatial_axis=2),
                RandScaleIntensity(factors=0.1, prob=0.5),
                RandShiftIntensity(offsets=0.1, prob=0.5),
                ToTensor(),
                ]
            )



In [37]:
image = transforms(image_file_path)
print(image.shape)

RuntimeError: applying transform <monai.transforms.spatial.array.RandRotate90 object at 0x706948f0ee50>

In [32]:
image = image.transpose(2, 0, 1)
print(image.shape)

TypeError: transpose() received an invalid combination of arguments - got (int, int, int), but expected one of:
 * (int dim0, int dim1)
 * (name dim0, name dim1)


In [23]:
# question = "Can you provide a caption consists of findings for this medical image?"
question = "Can you provide a diagnosis based on the fingings in chest in this image?."
# question = "What is liver in this image? Please output the box."
question_ids = tokenizer(
            question, add_special_tokens=False, max_length=768, truncation=True, padding="max_length", return_tensors="pt", padding_side="right"
        )["input_ids"][0]
image_tokens = "<im_patch>" * proj_out_num
input_txt = image_tokens + question
input_id = tokenizer(input_txt, return_tensors="pt")['input_ids'].to(device=device)


# generation = model.generate(image_pt, input_id, max_new_tokens=256, do_sample=True, top_p=0.9, temperature=1.0)
with torch.cuda.amp.autocast(): 
    generation = model.generate(image[0].to(device=device), input_id, question_ids=question_ids.to(device=device), max_new_tokens=768, do_sample=True, top_p=0.9, temperature=1.0)

generated_texts = tokenizer.batch_decode(generation, skip_special_tokens=True)

print('question', question)
print('generated_texts', generated_texts[0])

Keyword arguments {'padding_side': 'right'} not recognized.


KeyError: 3