# **Transcript to SOAP Note Generation using m42-health/Llama3-Med42-8B**

**1. Introduction**

This notebook converts medical transcripts into detailed SOAP notes using `m42-health/Llama3-Med42-8B`.
It leverages the Hugging Face `transformers` library with 8-bit quantization for reduced memory footprint compared to full precision, while aiming for good performance.
A GPU is highly recommended for running this 8B parameter model effectively.

In [None]:
# Install setup libraries Libraries
!pip install -U bitsandbytes
! pip install -q -U bitsandbytes accelerate transformers torch sentencepiece tqdm


In [None]:
# Import necessary libraries
import os
import re
from datetime import datetime
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

2. **Model Configuration and Loading (8-bit Quantization)**

Loads the `m42-health/Llama3-Med42-8B` model and tokenizer with 8-bit quantization.
Uses `load_in_8bit=True` to enable 8-bit quantization via `bitsandbytes`
`device_map="auto"`: Automatically uses available GPU(s) or CPU.
`torch_dtype` is set for layers not converted to 8-bit and for computations.


In [None]:
MODEL_NAME_OR_PATH = "m42-health/Llama3-Med42-8B"

# Global variables for model and tokenizer
LLM_PIPELINE = None
TOKENIZER = None
MODEL_LOADED_SUCCESSFULLY = False

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    current_device_name = torch.cuda.get_device_name(torch.cuda.current_device())
    print(f"Device name: {current_device_name}")

    # Check bfloat16 support for compute_dtype (optional but good for performance on compatible GPUs)
    if "cuda" in str(torch.rand(1, device="cuda").dtype): # Check if CUDA is working at all
        try:
            _ = torch.rand(1, dtype=torch.bfloat16, device="cuda") * torch.rand(1, dtype=torch.bfloat16, device="cuda")
            BF16_SUPPORTED = True
            print(f"Device {current_device_name} appears to support bfloat16.")
        except RuntimeError:
            BF16_SUPPORTED = False
            print(f"Device {current_device_name} does NOT appear to support bfloat16. Will use float16 for torch_dtype if CUDA available.")
    else:
        BF16_SUPPORTED = False
else:
    BF16_SUPPORTED = False

try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH)

    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id
        print("Set tokenizer.pad_token_id to tokenizer.eos_token_id")

    # Determine torch_dtype based on availability
    if torch.cuda.is_available():
        model_dtype = torch.bfloat16 if BF16_SUPPORTED else torch.float16
    else:
        model_dtype = torch.float32 # Default for CPU
    print(f"Using torch_dtype: {model_dtype}")

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME_OR_PATH,
        load_in_8bit=True,      # Enable 8-bit quantization
        device_map="auto",      # Automatically distribute model layers
        torch_dtype=model_dtype # For non-quantized layers and computations
    )
    print("Model loaded successfully with 8-bit quantization.")
    print(f"Model memory footprint: {model.get_memory_footprint() / 1e9:.2f} GB")


    # Create the text-generation pipeline
    llm_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
    )
    LLM_PIPELINE = llm_pipeline
    TOKENIZER = tokenizer
    MODEL_LOADED_SUCCESSFULLY = True
    print("Text-generation pipeline created successfully.")

except Exception as e:
    print(f"Error loading model or creating pipeline: {e}")
    print("Please ensure 'bitsandbytes' and 'accelerate' are installed and compatible.")
    print("A GPU is highly recommended. Check CUDA setup and GPU memory.")
    if "CUDA out of memory" in str(e):
        print("CUDA out of memory. This 8B model, even 8-bit quantized, requires significant VRAM.")
    elif "load_in_8bit" in str(e):
        print("Error related to 8-bit loading. Double-check library versions and GPU compatibility.")

**3. Data Preparation:** Define Paths and Transcript Files
Transcripts input directory (`TRANSCRIPTS_INPUT_DIR`) must exist and contain transcript files.

In [None]:
TRANSCRIPTS_INPUT_DIR = "./transcripts_data"
SOAP_OUTPUT_DIR = "./soap_notes_output"

if not os.path.exists(SOAP_OUTPUT_DIR):
    os.makedirs(SOAP_OUTPUT_DIR)
    print(f"Created directory: {SOAP_OUTPUT_DIR}")

PROCEED_WITH_PROCESSING = True
if not os.path.exists(TRANSCRIPTS_INPUT_DIR):
    print(f"ERROR: Transcripts input directory '{TRANSCRIPTS_INPUT_DIR}' not found.")
    PROCEED_WITH_PROCESSING = False
elif not os.listdir(TRANSCRIPTS_INPUT_DIR):
    print(f"WARNING: Transcripts input directory '{TRANSCRIPTS_INPUT_DIR}' is empty.")
    PROCEED_WITH_PROCESSING = False
else:
    print(f"Transcripts will be read from: {TRANSCRIPTS_INPUT_DIR}")

TRANSCRIPT_FILENAMES = [
    "encounter_1.txt", "encounter_2.txt", "encounter_3.txt", "encounter_4.txt", "encounter_5.txt", "encounter_6.txt", "encounter_7.txt", "encounter_8.txt", "encounter_9.txt", "encounter_10.txt"
]

4. **Helper Functions: Transcript Loading and Preprocessing**

In [None]:
def load_transcript(file_path: str) -> str:
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Transcript file not found at {file_path}")
    with open(file_path, 'r', encoding='utf-8') as f:
        return f.read()

def preprocess_transcript(transcript_text: str) -> str:
    text = re.sub(r'\n\s*\n', '\n\n', transcript_text).strip()
    return re.sub(r'\n{3,}', '\n\n', text)

5. **Prompt Engineering (Llama 3 Format - Detailed for Medical SOAP Note)**

This prompt is designed to elicit a detailed SOAP note, similar in structure and content
to the "Medical-Visit-SOAP-Note.pdf" example. It emphasizes handling transcripts with
potentially irregular or missing speaker labels.

In [None]:
def create_llama3_soap_prompt(tokenizer, transcript_text: str, patient_name: str = "Not specified in transcript", patient_dob: str = "Not specified in transcript") -> str:
    date_of_service = datetime.now().strftime("%Y-%m-%d")

    system_prompt_content = (
        "You are a helpful, respectful and honest medical assistant. "
        "Always answer as helpfully as possible, while being safe. "
        "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
        "Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
        "If you don’t know the answer to a question, please don’t share false information. "
        "Your primary task is to generate a comprehensive and detailed clinical SOAP note based on the provided medical encounter transcript. "
        "The transcript may have irregular formatting or missing speaker labels (e.g., 'Doctor:', 'Patient:'); you must infer roles and extract information from the dialogue flow. "
        "Focus on accuracy and strict adherence to the detailed SOAP structure outlined below. Use only information explicitly present or directly and confidently inferable from the transcript. "
        "Structure the output exactly as specified in the 'SOAP Note Format to Follow', including all headers and sub-headers."
    )

    user_instructions_content = f"""
**Medical Encounter Details (Extract from transcript if available, otherwise state "Not mentioned in transcript"):**
Client Full Name: {patient_name}
Client Date of Birth: {patient_dob}
Date of Service: {date_of_service}
Exact start time and end time: [Fill or state "Not mentioned in transcript"]
Session Location: [Fill or state "Not mentioned in transcript"]
Diagnosis (Primary, from assessment or clearly stated): [Fill or state "To be determined by clinician" or use assessment]

**Transcript to Process:**
\"\"\"
{transcript_text}
\"\"\"

**Instructions for SOAP Note Generation:**
Based **only** on the information present in the transcript provided above, generate a detailed SOAP note.
If information for a specific field or sub-section is not present in the transcript, explicitly state "Not mentioned in transcript" or "N/A". Do not invent information.
Follow the structure and level of detail exemplified by high-quality medical SOAP notes.

**SOAP Note Format to Follow:**

**S (Subjective):**
    - Chief Complaint (CC): (Patient's main reason for visit, ideally in their own words. E.g., "Experiencing increased anxiety symptoms.")
    - History of Present Illness (HPI): (Detailed chronological account of the CC. Include onset, duration, frequency, and severity of symptoms. Describe the nature of symptoms, e.g., "constantly on edge," "overwhelming feelings of doom." Note specific episodes like panic attacks, including their duration and accompanying physical sensations like shortness of breath, increased heart rate, trembling. Detail the impact on daily life, e.g., sleep disruption – "taking nearly an hour to fall asleep and waking frequently," difficulty concentrating, effects on work performance and relationships. Mention patient's attempts at coping strategies and their perceived effectiveness, e.g., "attempting to use breathing techniques... found it challenging." Include direct, illustrative patient quotes if available and pertinent, e.g., "I feel like I'm letting everyone down.")
    - Past Medical History (PMH): (Relevant chronic illnesses, significant past illnesses, surgeries, hospitalizations as reported by patient or mentioned in conversation).
    - Medications: (Current medications, including name, dosage, and frequency, as reported by patient).
    - Allergies: (Drug, food, environmental allergies and reactions, as reported by patient).
    - Family History (FHx): (Relevant medical conditions in family members, if mentioned).
    - Social History (SHx): (Relevant lifestyle factors, occupation, living situation, habits like smoking/alcohol, and stressors such as workplace issues, if mentioned).
    - Review of Systems (ROS): (Briefly cover other symptoms by body system if discussed by the patient during the encounter).

**O (Objective):**
    - General Appearance and Presentation: (e.g., "Presented to the session on time, appropriately dressed, and well-groomed.").
    - Affect and Mood: (Describe observed affect, e.g., "anxious and somewhat constricted." Note visible signs of emotional state like "visible tension in her shoulders and frequent hand wringing.").
    - Speech: (Describe characteristics, e.g., "rapid at times but normal in volume and content.").
    - Behavior and Cooperation during session: (e.g., "maintained good eye contact throughout most of the session, though she looked down when discussing her perceived failures." "receptive to feedback and actively participated.").
    - Cognitive Functioning: (Assess and describe, e.g., "demonstrated intact cognitive functioning with clear and logical thought processes." Note orientation, memory, attention, insight).
    - Psychomotor Activity: (e.g., normal, agitated, retarded, presence of tics or tremors).
    - Evidence of Psychosis, Suicidal Ideation, or Homicidal Ideation: (State if assessed and the findings, e.g., "No evidence of psychosis, suicidal ideation, or homicidal ideation was present.").
    - Standardized Assessment Scores: (Report any specific assessment tools used and scores, e.g., "Completed the GAD-7 assessment with a score of 16, indicating severe anxiety symptoms." Note any change from previous scores if mentioned, e.g., "increased from score of 12 at previous session.").
    - Vital Signs: (If measured and mentioned in the transcript, list them: e.g., BP, HR, RR, Temp, SpO2).
    - Physical Examination Findings: (If a physical exam was performed and findings mentioned, detail them by system).
    - Other Clinician Observations: (Any other pertinent objective observations made by the clinician during the encounter).

**A (Assessment):**
    - Clinical Summary and Impression: (Concisely summarize the patient's current clinical status. State if they continue to meet criteria for any diagnoses, e.g., "Alexis continues to meet criteria for Generalized Anxiety Disorder (F41.1)...").
    - Symptom Progression and Severity: (Note any intensification, exacerbation, stability, or improvement in symptoms since the last encounter or over the relevant period, e.g., "symptoms that have intensified since our last session." "The increase in panic attacks and sleep disturbances indicates a significant exacerbation...").
    - Contributing Factors and Triggers: (Identify likely triggers or factors contributing to the current presentation, e.g., "likely triggered by increased workplace demands and her perfectionist tendencies.").
    - Synthesis of S & O: (Briefly explain how subjective reports and objective findings support the clinical impression. Describe any observed patterns or cycles, e.g., "Her anxiety appears to be creating a self-reinforcing cycle, where worry about performance leads to physiological symptoms...").
    - Patient Strengths and Protective Factors: (Note any positive factors that can be leveraged in treatment, e.g., "willingness to engage in therapy," "her developing awareness of anxiety triggers," "motivation to implement coping strategies.").
    - Significance of Quantitative Measures: (Reiterate any objective scores and briefly explain their clinical significance, e.g., "Her GAD-7 score increase from 12 to 16 quantitatively confirms her subjective report of worsening symptoms.").
    - Areas for Therapeutic Focus: (Highlight specific challenges or areas needing therapeutic intervention, e.g., "The client's difficulty in effectively utilizing learned coping techniques during high-stress situations suggests the need for more practice...").

**P (Plan):**
    (List specific, actionable items related to treatment, monitoring, and follow-up. Number each item clearly).
    1. Therapeutic Interventions: (e.g., "Continue weekly individual therapy sessions focusing on Cognitive Behavioral Therapy interventions for anxiety management.").
    2. Skill Development/Reinforcement: (e.g., "Review and refine previously taught breathing techniques, with emphasis on practicing during periods of lower anxiety to build proficiency.").
    3. Introduction of New Coping Strategies: (e.g., "Introduce progressive muscle relaxation as an additional coping strategy, with in-session demonstration and daily practice assignments.").
    4. Specific Therapeutic Techniques: (e.g., "Implement thought challenging exercises targeting catastrophic thinking patterns related to work performance.").
    5. Patient Homework/Assignments: (e.g., "Assign homework to create and maintain a daily anxiety journal to better identify triggers and patterns.").
    6. Medication Management/Consultation: (e.g., "Discuss the potential benefits of consulting with her primary care physician regarding a medication evaluation if symptoms don't improve within 2-3 weeks.").
    7. Psychoeducation Topics: (e.g., "Provide psychoeducation about the relationship between sleep hygiene and anxiety, with specific recommendations for establishing a more consistent sleep routine.").
    8. Follow-up Scheduling: (e.g., "Schedule next appointment for [Date, e.g., 5/15/2025] at [Time, e.g., 10:15 am].").
    9. Other Referrals or Coordination of Care: (Any other planned actions, referrals, or consultations).

---
Please provide the **Generated SOAP Note** strictly following this detailed structure and ensuring all relevant information from the transcript is captured in the appropriate sections:
"""

    messages = [
        {"role": "system", "content": system_prompt_content.strip()},
        {"role": "user", "content": user_instructions_content.strip()}
    ]

    prompt_string = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True # Essential for Llama 3 to know it's the assistant's turn
    )
    return prompt_string

6. **LLM Interaction: Generating the SOAP Note**

In [None]:
def generate_soap_note_with_llm(pipe, tokenizer_instance, prompt_text: str) -> str:
    if not pipe or not MODEL_LOADED_SUCCESSFULLY:
        return "Error: LLM pipeline is not initialized or model failed to load."

    MAX_NEW_TOKENS = 3500
    TEMPERATURE = 0.15            # Low temperature minimizes randomness/hallucination as accuracy is important in fiels of medicine
    TOP_P = 0.9                   # still allows some variation for flexibility
    DO_SAMPLE = True

    stop_token_ids = [
        tokenizer_instance.eos_token_id,
        tokenizer_instance.convert_tokens_to_ids("<|eot_id|>")
    ]
    stop_token_ids = [id for id in stop_token_ids if id is not None]


    print(f"Sending prompt to LLM pipeline. Max new tokens: {MAX_NEW_TOKENS}...")
    try:
        outputs = pipe(
            prompt_text,
            max_new_tokens=MAX_NEW_TOKENS,
            eos_token_id=stop_token_ids,
            do_sample=DO_SAMPLE,
            temperature=TEMPERATURE,
            top_p=TOP_P,
            return_full_text=False,
        )
        generated_text = outputs[0]['generated_text'].strip()
        print("LLM generation complete.")
        return generated_text
    except Exception as e:
        print(f"Error during LLM generation with pipeline: {e}")
        return f"Error during LLM generation. Check console. Exception: {str(e)}"

**Post-processing**

In [None]:
def postprocess_soap_note(llm_output: str) -> str:
    clean_output = llm_output.strip()
    clean_output = clean_output.replace("<|eot_id|>", "").replace("<|end_of_text|>", "").strip()

    if clean_output.lower().startswith("**generated soap note:**"):
        clean_output = re.split(r"\*\*Generated SOAP Note:\*\*", clean_output, maxsplit=1, flags=re.IGNORECASE)[-1].strip()
    elif clean_output.lower().startswith("generated soap note:"):
        clean_output = re.split(r"Generated SOAP Note:", clean_output, maxsplit=1, flags=re.IGNORECASE)[-1].strip()

    return clean_output

9. **Main Pipeline Orchestration**


In [None]:
def transcript_to_soap_pipeline(transcript_file_path: str, llm_pipe_instance, tokenizer_instance) -> str:
    print(f"\n--- Starting processing for: {os.path.basename(transcript_file_path)} ---")
    if not MODEL_LOADED_SUCCESSFULLY or not llm_pipe_instance:
        return "LLM not available. Skipping."
    try:
        raw_transcript = load_transcript(transcript_file_path)
    except Exception as e:
        return f"Error loading transcript: {e}"

    preprocessed_transcript = preprocess_transcript(raw_transcript)
    prompt = create_llama3_soap_prompt(tokenizer_instance, preprocessed_transcript)

    llm_generated_soap_note = generate_soap_note_with_llm(llm_pipe_instance, tokenizer_instance, prompt)
    if "Error during LLM generation" in llm_generated_soap_note or llm_generated_soap_note.startswith("Error:"):
        return llm_generated_soap_note

    final_soap_note = postprocess_soap_note(llm_generated_soap_note)
    print(f"--- Finished processing for: {os.path.basename(transcript_file_path)} ---")
    return final_soap_note

10. **Execution and Results**


In [None]:
generated_soap_notes_all = {}

if not PROCEED_WITH_PROCESSING:
    print("\nSkipping SOAP note generation: Input directory issue.")
elif not MODEL_LOADED_SUCCESSFULLY or not LLM_PIPELINE or not TOKENIZER:
    print("\nSkipping SOAP note generation: Model or Tokenizer not loaded successfully.")
else:
    print(f"\nStarting SOAP note generation for up to {len(TRANSCRIPT_FILENAMES)} transcripts...")
    print(f"Model: {MODEL_NAME_OR_PATH} (8-bit Quantized)")

    files_processed_count = 0
    for filename in tqdm(TRANSCRIPT_FILENAMES, desc="Processing Transcripts"):
        full_file_path = os.path.join(TRANSCRIPTS_INPUT_DIR, filename)
        if not os.path.exists(full_file_path):
            print(f"\nWarning: Transcript file {filename} not found. Skipping.")
            generated_soap_notes_all[filename] = f"Error: File {filename} not found."
            continue

        soap_note_output = transcript_to_soap_pipeline(full_file_path, LLM_PIPELINE, TOKENIZER)
        generated_soap_notes_all[filename] = soap_note_output
        files_processed_count +=1

        output_filepath = os.path.join(SOAP_OUTPUT_DIR, f"soap_note_{os.path.splitext(filename)[0]}.txt")
        try:
            with open(output_filepath, "w", encoding="utf-8") as f:
                header = (
                    f"SOAP Note for Transcript: {filename}\n"
                    f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
                    f"Model: {MODEL_NAME_OR_PATH} (8-bit Quantized)\n"
                    f"---\n\n"
                )
                f.write(header)
                f.write(soap_note_output)
            print(f"SOAP note for {filename} saved to: {output_filepath}")
        except Exception as e:
            print(f"Error saving SOAP note for {filename}: {e}")
        print("\n" + "="*80 + "\n")

    if files_processed_count == 0:
         print(f"No matching transcript files processed from the list in '{TRANSCRIPTS_INPUT_DIR}'.")
    else:
        print(f"Processed {files_processed_count} transcript(s).")
    print("All specified transcripts handled.")

11. **Display Example Generated SOAP Note**

In [None]:
if PROCEED_WITH_PROCESSING and MODEL_LOADED_SUCCESSFULLY and generated_soap_notes_all:
    first_key = next(iter(generated_soap_notes_all), None)
    if first_key and not generated_soap_notes_all[first_key].lower().startswith("error:"):
        print(f"\nExample - SOAP Note for '{first_key}':\n")
        print(generated_soap_notes_all[first_key])
    elif first_key:
        print(f"\nExample - Error for '{first_key}':\n{generated_soap_notes_all[first_key]}")
    else:
        print("\nNo SOAP notes were generated or an error occurred that prevented display.")
