In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             torch_dtype=torch.bfloat16, device_map="auto",
                                             cache_dir='/media/elboardy/RLAB-Disk01/Large-Language-Models-Weights',
                                )
tokenizer = AutoTokenizer.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/elboardy/anaconda3/envs/nvidia-nemotron/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/elboardy/anaconda3/envs/nvidia-nemotron/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/elboardy/anaconda3/envs/nvidia-nemotron/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/elboardy/anaconda3/

In [None]:
# Collect list of cases that do not have 'qwen-vl-72b-response.txt'
cases_to_process = []
for case in os.listdir(cases_dir):
    case_dir = os.path.join(cases_dir, case)
    if os.path.isdir(case_dir):
        response_path = os.path.join(case_dir, 'nvidia-neumtron-70b-response.txt')
        if not os.path.exists(response_path):
            cases_to_process.append(case)
print(f"Found {len(cases_to_process)} cases to process.")

A sweet question!

Let's count the "R"s in "strawberry":

1. S
2. T
3. R
4. A
5. W
6. B
7. E
8. R
9. R
10. Y

There are **3 "R"s** in the word "strawberry".


In [None]:
print("Processing cases...--->")
print(cases_to_process)

In [None]:
# Initialize list to track failed cases
failed_cases = []

# Iterate through cases in the directory
for case in cases_to_process:
    case_dir = os.path.join(cases_dir, case)
    if not os.path.isdir(case_dir):
        continue  # Skip non-directory files

    image_files = [f for f in os.listdir(case_dir) if f.lower().endswith('.png')]
    print(f"Found {len(image_files)} image files for case {case}")

    clinical_information_path = os.path.join(case_dir, 'diagnostic_prompt.txt')
    if not os.path.exists(clinical_information_path):
        print(f"Missing clinical information file for case: {case}")
        continue


    print(f"Reading clinical information for case {case}")    
    clinical_information = open(clinical_information_path, 'r', encoding='utf-8').read()

    system_prompt = """Consider that you are a professional radiologist with several years of experience and you are now treating a patient. Write a fully detailed diagnosis report for this case, avoiding any potential hallucination and paying close attention to all of the batch images attached to this message.

Use the following structure for the report:

## Radiologist's Report

### Patient Information:
- *Age:* 65
- *Sex:* Male
- *Days from earliest imaging to surgery:* 1
- *Histopathological Subtype:* Glioblastoma
- *WHO Grade:* 4
- *IDH Status:* Mutant
- *Preoperative KPS:* 80
- *Preoperative Contrast-Enhancing Tumor Volume (cm³):* 103.21
- *Preoperative T2/FLAIR Abnormality (cm³):* 36.29
- *Extent of Resection (EOR %):* 100.0
- *EOR Type:* Gross Total Resection (GTR)
- *Adjuvant Therapy:* Radiotherapy (RT) + Temozolomide (TMZ)
- *Progression-Free Survival (PFS) Days:* 649
- *Overall Survival (OS) Days:* 736

#### Tumor Characteristics:

#### Segmentation Analysis:

#### Surgical Considerations:

### Clinical Summary:

### Recommendations:

### Prognostic Considerations:

### Follow-Up Plan:

### Additional Notes*(if any)*:

Ensure all findings from all of the images and clinical data provided. Please mention at the end of the report how many images were reviewed."""

    user_prompt = f"""You will be given batches of images, which are different sequences of MRI scans. 
    The images are for patients who are likely to have a brain tumor. Each image will contain up to 10 slices for 5 different sequences and the segmentation masks for the tumor at the bottom row of the image. 
    Additional clinical data about the patient is: 
    {clinical_information}"""

    # Collect the last image
    last_image = None
    for image_file in image_files:
        image_path = os.path.join(case_dir, image_file)
        img = load_image(image_path)
        if img is not None:
            last_image = {"type": "image", "image": img}
            print(f"Loaded image: {image_file} for case {case}")
    if last_image is None:
        print(f"No valid images found for case: {case}")
        continue

    # Prepare messages for Qwen2-VL
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": [last_image, {"type": "text", "text": user_prompt}]},
    ]
    print(f"Messages prepared for case {case}")

    try:
        # Prepare inputs for Qwen2-VL
        print(f"Processing inputs for case {case}")
        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        image_inputs, video_inputs = process_vision_info(messages)

        # Generate response with gradient calculations disabled
        with torch.no_grad():
            inputs = processor(
                text=[text],
                images=image_inputs,
                videos=video_inputs,
                padding=True,
                return_tensors="pt",
            ).to(model.device)

            generated_ids = model.generate(**inputs, max_new_tokens=4096, temperature=0.7, top_p=0.9)
            generated_ids_trimmed = [
                out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
            ]
            response_text = processor.batch_decode(
                generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
            )[0]


        print(f"Response generated for case {case}, length: {len(response_text)} characters")
        # Save the response
        response_path = os.path.join(case_dir, 'qwen-vl-72b-response.txt')
        with open(response_path, 'w', encoding='utf-8') as f:
            f.write(response_text)
        print(f"Response saved for case {case}.")
        # Memory management
        del inputs, generated_ids, response_text
        gc.collect()
        torch.cuda.empty_cache()
        print(f"Memory cleared for case {case}")
        

    except torch.cuda.OutOfMemoryError:
        print(f"CUDA out of memory error for case {case}. Skipping this case.")
        failed_cases.append(case)
        continue
    except KeyboardInterrupt:
        print("Interrupted by user. Proceeding to the next case.")
        continue
    except Exception as e:
        print(f"Error processing case {case}: {e}")
        failed_cases.append(case)
        continue

# After processing all cases, save failed cases
failed_cases_path = os.path.join(cases_dir, 'failed_cases_72.txt')
with open(failed_cases_path, 'w', encoding='utf-8') as f:
    for failed_case in failed_cases:
        f.write(f"{failed_case}\n")
print(f"Failed cases logged in {failed_cases_path}")