In [2]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
from PIL import Image
from transformers import AutoModelForCausalLM
from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
from deepseek_vl2.utils.io import load_pil_images
import torch
import gc
import traceback
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
from torch.cuda.amp import autocast
import pickle


if __name__ == '__main__':
    cases_dir = '/media/RLAB-Disk01/(final)merged_images_with_labels_order_and_folders_mask_normalized/'
    cache_dir = '/media/RLAB-Disk01/Large-Language-Models-Weights'
    model_id = "deepseek-ai/deepseek-vl2"
    offload_folder = '/media/RLAB-Disk01/Large-Language-Models-Weights'
    
    cases_to_process = []
    for case in os.listdir(cases_dir):
        case_dir = os.path.join(cases_dir, case)
        if os.path.isdir(case_dir):
            response_path = os.path.join(case_dir, 'deepseek-vl2.txt')
            if not os.path.exists(response_path):
                cases_to_process.append(case)
    print(f"Found {len(cases_to_process)} cases to process.")

    processing_queue = []

    torch.set_default_tensor_type(torch.FloatTensor)
    processor = DeepseekVLV2Processor.from_pretrained(model_id, torch_dtype=torch.float32,
                                                     device_map="cpu", trust_remote_code=True, 
                                                     cache_dir=cache_dir, 
                                                     offload_folder=offload_folder)

    tokenizer = processor.tokenizer
    # tokenizer.add_special_tokens({'pad_token': '<|pad|>'})
    # tokenizer.add_special_tokens({'additional_special_tokens': ['<image>','<|ref|>', '<|/ref|>', '<|det|>', '<|/det|>', '<|grounding|>','<|User|>', '<|Assistant|>']})
    
    def build_conversation(case_dir, user_prompt,system_prompt):
        image_files = [f for f in os.listdir(case_dir) if f.lower().endswith('.png')]
        image_paths = [os.path.join(case_dir, f) for f in image_files]
        
        image_files= [image_files[-1]]
        image_paths = [image_paths[-1]]
        # Create image placeholders
        print(image_files)
        print(image_paths)
        image_tags = " ".join(["<image>"] * len(image_files))
        print(image_tags)
        
        return [
    
             {
                "role": "<|User|>",
                "content": f"{image_tags}\n<|ref|>{system_prompt}\n{user_prompt}<|/ref|>",
                "images": image_paths
            },
    
            {"role": "<|Assistant|>", "content": ""}
        ]
    
    failed_cases = []
    
    for case in cases_to_process:
        case_dir = os.path.join(cases_dir, case)
    
        clinical_path = os.path.join(case_dir, 'diagnostic_prompt.txt')
        if not os.path.exists(clinical_path):
                print(f"Missing clinical info for {case}")
                failed_cases.append(case)
                continue
                
        clinical_info = open(clinical_path).read()
            
    
        system_prompt = "Consider that you are a professional radiologist with several years of experience and you are now treating a patient. Write a fully detailed diagnosis report for this case, avoiding any potential hallucination and paying close attention to all of the batch images attached to this message.\n" +\
    "\n" +\
    "Use the following structure for the report:\n" +\
    "\n" +\
    "## Radiologist's Report\n" +\
    "\n" +\
    "### Patient Information:\n" +\
    "- *Age:* 65\n" +\
    "- *Sex:* Male\n" +\
    "- *Days from earliest imaging to surgery:* 1\n" +\
    "- *Histopathological Subtype:* Glioblastoma\n" +\
    "- *WHO Grade:* 4\n" +\
    "- *IDH Status:* Mutant\n" +\
    "- *Preoperative KPS:* 80\n" +\
    "- *Preoperative Contrast-Enhancing Tumor Volume (cm³):* 103.21\n" +\
    "- *Preoperative T2/FLAIR Abnormality (cm³):* 36.29\n" +\
    "- *Extent of Resection (EOR %):* 100.0\n" +\
    "- *EOR Type:* Gross Total Resection (GTR)\n" +\
    "- *Adjuvant Therapy:* Radiotherapy (RT) + Temozolomide (TMZ)\n" +\
    "- *Progression-Free Survival (PFS) Days:* 649\n" +\
    "- *Overall Survival (OS) Days:* 736\n" +\
    "\n" +\
    "#### Tumor Characteristics:\n" +\
    "\n" +\
    "#### Segmentation Analysis:\n" +\
    "\n" +\
    "#### Surgical Considerations:\n" +\
    "\n" +\
    "### Clinical Summary:\n" +\
    "\n" +\
    "### Recommendations:\n" +\
    "\n" +\
    "### Prognostic Considerations:\n" +\
    "\n" +\
    "### Follow-Up Plan:\n" +\
    "\n" +\
    "### Additional Notes*(if any)*:\n" +\
    "\n" +\
    "Ensure all findings from all of the images and clinical data provided. Please mention at the end of the report how many images were reviewed."
    
        user_prompt = f"You will be given batches of images, which are different sequences of MRI scans. You should do your best in answering \n" +\
    f"    The images are for patients who are likely to have a brain tumor. Each image will contain up to 10 slices for 5 different sequences and the segmentation masks for the tumor at the bottom row of the image. \n" +\
    f"    Additional clinical data about the patient is: \n" +\
    f"    {clinical_info}"
        # Prepare conversation
        conversation = build_conversation(case_dir, user_prompt,system_prompt)
        pil_images = load_pil_images(conversation)
        
        print(conversation)

        # Process inputs on CPU
        try:
            inputs = processor(
                conversations=conversation,
                images=pil_images,
                force_batchify=True,
                system_prompt=system_prompt
            )

            processing_queue.append({
                'case_dir': case_dir,
                'inputs': inputs,
                'clinical_info': clinical_info
            })

            del conversation, pil_images, inputs, clinical_info, system_prompt, user_prompt, clinical_path, case_dir
            #write a line to delete the rest of the variables
            
            torch.cuda.empty_cache()

            print(f"Inputs saved on case {case}")

        except Exception as e:
             print(f"Error during input processing for case {case}: {e}")
             traceback.print_exc()
             failed_cases.append(case)
            
        
   
        gc.collect()
        torch.cuda.empty_cache()
    
    # Save failed cases
    if failed_cases:
        with open(os.path.join(cases_dir, 'failed_deepseek-vl2.txt'), 'w') as f:
            f.write("\n".join(failed_cases))

  from .autonotebook import tqdm as notebook_tqdm
  warn(


Found 29 cases to process.


  _C._set_default_tensor_type(t)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Add pad token = ['<｜▁pad▁｜>'] to the tokenizer
<｜▁pad▁｜>:2
Add image token = ['<image>'] to the tokenizer
<image>:128815
Add grounding-related tokens = ['<|ref|>', '<|/ref|>', '<|det|>', '<|/det|>', '<|grounding|>'] to the tokenizer with input_ids
<|ref|>:128816
<|/ref|>:128817
<|det|>:128818
<|/det|>:128819
<|grounding|>:128820
Add chat tokens = ['<|User|>', '<|Assistant|>'] to the tokenizer with input_ids
<|User|>:128821
<|Assistant|>:128822

['RHUH-0011_batch_7.png']
['/media/RLAB-Disk01/(final)merged_images_with_labels_order_and_folders_mask_normalized/RHUH-0011/RHUH-0011_batch_7.png']
<image>
[{'role': '<|User|>', 'content': "<image>\n<|ref|>Consider that you are a professional radiologist with several years of experience and you are now treating a patient. Write a fully detailed diagnosis report for this case, avoiding any potential hallucination and paying close attention to all of the batch images attached to this message.\n\nUse the following structure for the report:\n\n## Ra

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Inputs saved on case RHUH-0011
['RHUH-0012_batch_9.png']
['/media/RLAB-Disk01/(final)merged_images_with_labels_order_and_folders_mask_normalized/RHUH-0012/RHUH-0012_batch_9.png']
<image>
[{'role': '<|User|>', 'content': "<image>\n<|ref|>Consider that you are a professional radiologist with several years of experience and you are now treating a patient. Write a fully detailed diagnosis report for this case, avoiding any potential hallucination and paying close attention to all of the batch images attached to this message.\n\nUse the following structure for the report:\n\n## Radiologist's Report\n\n### Patient Information:\n- *Age:* 65\n- *Sex:* Male\n- *Days from earliest imaging to surgery:* 1\n- *Histopathological Subtype:* Glioblastoma\n- *WHO Grade:* 4\n- *IDH Status:* Mutant\n- *Preoperative KPS:* 80\n- *Preoperative Contrast-Enhancing Tumor Volume (cm³):* 103.21\n- *Preoperative T2/FLAIR Abnormality (cm³):* 36.29\n- *Extent of Resection (EOR %):* 100.0\n- *EOR Type:* Gross Total R

In [3]:
del processor, tokenizer

In [4]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
from transformers import AutoModelForCausalLM
from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
import torch
from accelerate import init_empty_weights, load_checkpoint_and_dispatch

if __name__ == '__main__':
    cases_dir = '/media/RLAB-Disk01/(final)merged_images_with_labels_order_and_folders_mask_normalized/'
    cache_dir = '/media/RLAB-Disk01/Large-Language-Models-Weights'
    model_id = "deepseek-ai/deepseek-vl2"
    offload_folder = '/media/RLAB-Disk01/Large-Language-Models-Weights'
    
    cases_to_process = []
    for case in os.listdir(cases_dir):
        case_dir = os.path.join(cases_dir, case)
        if os.path.isdir(case_dir):
            response_path = os.path.join(case_dir, 'deepseek-vl2.txt')
            if not os.path.exists(response_path):
                cases_to_process.append(case)
    print(f"Found {len(cases_to_process)} cases to process.")

    torch.set_default_tensor_type(torch.FloatTensor)
            # Loading the Model to GPU

    torch.set_default_device('cpu')



    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float32,
        trust_remote_code=True,
        device_map="cpu",  # Let accelerate handle CPU allocation
        cache_dir=cache_dir
    ).eval()

    # Load processor on CPU
    processor = DeepseekVLV2Processor.from_pretrained(
        model_id,
        torch_dtype=torch.float32,
        cache_dir=cache_dir
    )
    tokenizer = processor.tokenizer
    # tokenizer.add_special_tokens({'pad_token': '<|pad|>'})
    # tokenizer.add_special_tokens({'additional_special_tokens': ['<image>','<|ref|>', '<|/ref|>', '<|det|>', '<|/det|>', '<|grounding|>','<|User|>', '<|Assistant|>']})    
    model = model.eval()
    failed_cases = []

Found 29 cases to process.


Loading checkpoint shards: 100%|██████████| 8/8 [15:31<00:00, 116.48s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Add pad token = ['<｜▁pad▁｜>'] to the tokenizer
<｜▁pad▁｜>:2
Add image token = ['<image>'] to the tokenizer
<image>:128815
Add grounding-related tokens = ['<|ref|>', '<|/ref|>', '<|det|>', '<|/det|>', '<|grounding|>'] to the tokenizer with input_ids
<|ref|>:128816
<|/ref|>:128817
<|det|>:128818
<|/det|>:128819
<|grounding|>:128820
Add chat tokens = ['<|User|>', '<|Assistant|>'] to the tokenizer with input_ids
<|User|>:128821
<|Assistant|>:128822



In [5]:
print(processing_queue[-1])

{'case_dir': '/media/RLAB-Disk01/(final)merged_images_with_labels_order_and_folders_mask_normalized/RHUH-0040', 'inputs': BatchCollateOutput(sft_format=["Consider that you are a professional radiologist with several years of experience and you are now treating a patient. Write a fully detailed diagnosis report for this case, avoiding any potential hallucination and paying close attention to all of the batch images attached to this message.\n\nUse the following structure for the report:\n\n## Radiologist's Report\n\n### Patient Information:\n- *Age:* 65\n- *Sex:* Male\n- *Days from earliest imaging to surgery:* 1\n- *Histopathological Subtype:* Glioblastoma\n- *WHO Grade:* 4\n- *IDH Status:* Mutant\n- *Preoperative KPS:* 80\n- *Preoperative Contrast-Enhancing Tumor Volume (cm³):* 103.21\n- *Preoperative T2/FLAIR Abnormality (cm³):* 36.29\n- *Extent of Resection (EOR %):* 100.0\n- *EOR Type:* Gross Total Resection (GTR)\n- *Adjuvant Therapy:* Radiotherapy (RT) + Temozolomide (TMZ)\n- *Pr

In [None]:
for item in processing_queue:
        try:
            # Move inputs to model device
            inputs = item['inputs']
            print(inputs)
            prepared_inputs = {
                        "input_ids": inputs.input_ids.to(torch.long),  # Keep input_ids as Long
                        "attention_mask": inputs.attention_mask.to(model.device),
                         "pixel_values": inputs.images.to(torch.float32) if hasattr(inputs,'images') else None,
                         "images_seq_mask": inputs.images_seq_mask.to(model.device) if hasattr(inputs,'images_seq_mask') else None
    
                       }
            print(f"Inputs moved to model device for case {case}")
            inputs_embeds = model.prepare_inputs_embeds(**prepared_inputs)
            print(f"Messages prepared for case {case_dir[-1]}")
            # Generate with mixed precision
            with torch.no_grad():
                outputs = model.language.generate(
                        inputs_embeds=inputs_embeds,
                        attention_mask=inputs["attention_mask"],
                        pad_token_id=tokenizer.pad_token_id,
                        bos_token_id=tokenizer.bos_token_id,
                        eos_token_id=tokenizer.eos_token_id,
                        max_new_tokens=4096,
                        temperature=0.7,
                        top_p=0.9,
                        do_sample=False,
                        use_cache=True,
                    )
                
            # Decode and save
            print(outputs.cpu().tolist())
            print("----------------------------------------------------------------------------------")
            report = processor.tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
            print(report)
            print("----------------------------------------------------------------------------------")
            print(f"Report generated for case {case}")
            with open(os.path.join(item['case_dir'], 'deepseek-vl2.txt'), 'w') as f:
                print("writing report now")
                print(os.path.join(item['case_dir']))
                f.write(report)
                print("report written")
                
            # Cleanup
            del inputs, outputs
            torch.cuda.empty_cache()
            
        except Exception as e:
            failed_cases.append(item['case_dir'])
            print(f"Failed generation for {item['case_dir']}: {str(e)}")
            print(traceback.format_exc())
            break


    # Final cleanup
gc.collect()
torch.cuda.empty_cache()
    
    # Save failure log
if failed_cases:
        with open(os.path.join(cases_dir, 'failed_deepseek-vl2.txt'), 'w') as f:
            f.write("\n".join(failed_cases))

BatchCollateOutput(sft_format=["Consider that you are a professional radiologist with several years of experience and you are now treating a patient. Write a fully detailed diagnosis report for this case, avoiding any potential hallucination and paying close attention to all of the batch images attached to this message.\n\nUse the following structure for the report:\n\n## Radiologist's Report\n\n### Patient Information:\n- *Age:* 65\n- *Sex:* Male\n- *Days from earliest imaging to surgery:* 1\n- *Histopathological Subtype:* Glioblastoma\n- *WHO Grade:* 4\n- *IDH Status:* Mutant\n- *Preoperative KPS:* 80\n- *Preoperative Contrast-Enhancing Tumor Volume (cm³):* 103.21\n- *Preoperative T2/FLAIR Abnormality (cm³):* 36.29\n- *Extent of Resection (EOR %):* 100.0\n- *EOR Type:* Gross Total Resection (GTR)\n- *Adjuvant Therapy:* Radiotherapy (RT) + Temozolomide (TMZ)\n- *Progression-Free Survival (PFS) Days:* 649\n- *Overall Survival (OS) Days:* 736\n\n#### Tumor Characteristics:\n\n#### Segme



[[361, 342, 4571, 19054, 14, 790, 342, 4571, 554, 3645, 16, 342, 4571, 19054, 14, 790, 342, 4571, 554, 3645, 16, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 4571, 19054, 14, 342, 45