In [1]:
!pip install transformers datasets torch seqeval

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)


# Dataset Loading and Splitting

* This section loads the dataset and splits it into training, validation, and test sets. The dataset contains medical dialogue transcripts and corresponding clinical notes

In [2]:
from datasets import load_dataset
import numpy as np

# Load the dataset
dataset = load_dataset("har1/MTS_Dialogue-Clinical_Note")

# Split dataset into train (70%), validation (15%), test (15%)
train_temp_split = dataset["train"].train_test_split(test_size=0.3, shuffle=True, seed=42)
val_test_split = train_temp_split["test"].train_test_split(test_size=0.5, shuffle=True, seed=42)

dataset["train"] = train_temp_split["train"]
dataset["validation"] = val_test_split["train"]
dataset["test"] = val_test_split["test"]

print("Updated dataset splits:", dataset.keys())
print("Train size:", len(dataset["train"]))
print("Validation size:", len(dataset["validation"]))
print("Test size:", len(dataset["test"]))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/1.67k [00:00<?, ?B/s]

MTS-Dialog-TrainingSet%20%28SDHP%29.csv:   0%|          | 0.00/1.02M [00:00<?, ?B/s]

(…)Dialog-Validation%20Set%20%28SDHP%29.csv:   0%|          | 0.00/74.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1301 [00:00<?, ? examples/s]

Updated dataset splits: dict_keys(['train', 'validation', 'test'])
Train size: 910
Validation size: 195
Test size: 196


# Tokenizer and Label Configuration

* This section initializes the BioBERT tokenizer and defines the labels for Named Entity Recognition (NER). The labels include categories like SYMPTOM, DIAGNOSIS, TREATMENT, and PROGNOSIS.

In [3]:
from transformers import BertTokenizerFast

# Load BioBERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained("dmis-lab/biobert-base-cased-v1.1")

# Define labels for NER
label_list = ["O", "B-SYMPTOM", "I-SYMPTOM", "B-DIAGNOSIS", "I-DIAGNOSIS", "B-TREATMENT", "I-TREATMENT", "B-PROGNOSIS", "I-PROGNOSIS"]
label2id = {label: idx for idx, label in enumerate(label_list)}
id2label = {idx: label for label, idx in label2id.items()}

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

# Preprocessing for Named Entity Recognition (NER)

* This section preprocesses the dataset by tokenizing the dialogue and aligning entities (e.g., symptoms, diagnoses) with their corresponding tokens. It uses exact offset mapping to ensure accurate token-level labeling.

In [4]:
def preprocess_mts_for_ner(examples):
    dialogues = examples["dialogue"]
    section_texts = examples["section_text"]
    tokenized_inputs = {"input_ids": [], "attention_mask": [], "labels": []}

    for dialogue, section_text in zip(dialogues, section_texts):
        # Tokenize dialogue with offset mapping
        encodings = tokenizer(dialogue, truncation=True, padding="max_length", max_length=512, return_offsets_mapping=True)
        input_ids = encodings["input_ids"]
        attention_mask = encodings["attention_mask"]
        offsets = encodings["offset_mapping"]
        labels = [label2id["O"]] * len(input_ids)
        dialogue_lower = dialogue.lower()

        # Parse section_text for entities
        section_lines = section_text.lower().split("\n")
        entities = []

        # Symptoms
        for line in section_lines:
            if "symptoms:" in line:
                symptoms = [s.strip() for s in line.split("symptoms:")[1].strip().split(",")]
                for symptom in symptoms:
                    if symptom and symptom in dialogue_lower:
                        start = dialogue_lower.index(symptom)
                        end = start + len(symptom)
                        entities.append((start, end, "SYMPTOM"))


        for line in section_lines:
            if "diagnosis:" in line:
                diagnosis_text = line.split("diagnosis:")[1].strip()
                diagnoses = [d.strip() for d in diagnosis_text.split(",")]
                for diagnosis in diagnoses:
                    if diagnosis in dialogue_lower:
                        start = dialogue_lower.index(diagnosis)
                        end = start + len(diagnosis)
                        entities.append((start, end, "DIAGNOSIS"))
                    else:

                        keywords = diagnosis.split()
                        for keyword in keywords:
                            if keyword in dialogue_lower:
                                start = dialogue_lower.index(keyword)
                                end = start + len(keyword)
                                entities.append((start, end, "DIAGNOSIS"))


        treatment_keywords = ["prescribed", "sessions", "plan of action:"]
        for line in section_lines:
            if any(kw in line for kw in treatment_keywords):
                if "plan of action:" in line and "n/a" not in line:
                    treatments = [t.strip() for t in line.split("plan of action:")[1].strip().split(",")]
                else:
                    treatments = [line.strip()]  # Treat the whole line as a potential treatment if keywords match
                for treatment in treatments:
                    if treatment in dialogue_lower:
                        start = dialogue_lower.index(treatment)
                        end = start + len(treatment)
                        entities.append((start, end, "TREATMENT"))
                    else:
                        # Check for partial matches (e.g., "physiotherapy" from "ten sessions of physiotherapy")
                        keywords = treatment.split()
                        for keyword in keywords:
                            if keyword in dialogue_lower:
                                start = dialogue_lower.index(keyword)
                                end = start + len(keyword)
                                entities.append((start, end, "TREATMENT"))

        # Prognosis
        prognosis_terms = ["recover", "improving", "better", "heal", "resolve"]
        for term in prognosis_terms:
            if term in dialogue_lower:
                start = dialogue_lower.index(term)
                end = start + len(term)
                entities.append((start, end, "PROGNOSIS"))

        # Align entities to tokens
        entities.sort(key=lambda x: x[0])  # Sort by start position
        for start, end, entity_type in entities:
            for i, (offset_start, offset_end) in enumerate(offsets):
                if offset_start >= start and offset_end <= end and offset_end > 0:
                    if labels[i] == label2id["O"]:
                        labels[i] = label2id[f"B-{entity_type}"]
                    # Extend to subsequent tokens if entity spans multiple
                    j = i + 1
                    while j < len(offsets) and offsets[j][0] < end and offsets[j][1] > 0:
                        if labels[j] == label2id["O"]:
                            labels[j] = label2id[f"I-{entity_type}"]
                        j += 1

        tokenized_inputs["input_ids"].append(input_ids)
        tokenized_inputs["attention_mask"].append(attention_mask)
        tokenized_inputs["labels"].append(labels)

    return tokenized_inputs

# Apply preprocessing
tokenized_dataset = dataset.map(
    preprocess_mts_for_ner,
    batched=True,
    remove_columns=dataset["train"].column_names
)

tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/910 [00:00<?, ? examples/s]

Map:   0%|          | 0/195 [00:00<?, ? examples/s]

Map:   0%|          | 0/196 [00:00<?, ? examples/s]

# Tuning the BioBERT NER Model

* This section initializes the BioBERT model for token classification and trains it on the preprocessed dataset. It uses a custom compute_metrics function to evaluate the model's performance during training. The model is fine-tuned to recognize medical entities like symptoms, diagnoses, and treatments.

In [5]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer, DataCollatorForTokenClassification
import numpy as np
from seqeval.metrics import classification_report, precision_score, recall_score, f1_score

# Load BioBERT model
model = AutoModelForTokenClassification.from_pretrained(
    "dmis-lab/biobert-base-cased-v1.1",
    num_labels=len(label_list),
    label2id=label2id,
    id2label=id2label
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./biobert_ner_results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-5,  # Lowered for better convergence
    per_device_train_batch_size=8,  # Increased for faster training
    per_device_eval_batch_size=8,
    num_train_epochs=10,  # Reduced to prevent overfitting
    weight_decay=0.01,
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
)

# Compute metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    true_labels = [[id2label[l] for l in label if l != -100] for label in labels]
    pred_labels = [[id2label[p] for p, l in zip(pred, label) if l != -100] for pred, label in zip(predictions, labels)]
    return {
        "precision": precision_score(true_labels, pred_labels),
        "recall": recall_score(true_labels, pred_labels),
        "f1": f1_score(true_labels, pred_labels),
        "report": classification_report(true_labels, pred_labels)
    }

# Data collator
data_collator = DataCollatorForTokenClassification(tokenizer)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Train and evaluate
trainer.train()
test_results = trainer.evaluate(tokenized_dataset["test"])
print("Test Results:", test_results)

# Save model
trainer.save_model("./biobert_ner_finetuned")
tokenizer.save_pretrained("./biobert_ner_finetuned")

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mashoksai-gspl245[0m ([33mashoksai-gspl245-jeppiaar-engineering-college[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Report
1,0.043,0.032496,0.0,0.0,0.0,precision recall f1-score support  DIAGNOSIS 0.00 0.00 0.00 120  PROGNOSIS 0.00 0.00 0.00 9  SYMPTOM 0.00 0.00 0.00 53  TREATMENT 0.00 0.00 0.00 249  micro avg 0.00 0.00 0.00 431  macro avg 0.00 0.00 0.00 431 weighted avg 0.00 0.00 0.00 431
2,0.0366,0.02789,0.450704,0.074246,0.12749,precision recall f1-score support  DIAGNOSIS 0.03 0.01 0.01 120  PROGNOSIS 0.00 0.00 0.00 9  SYMPTOM 0.00 0.00 0.00 53  TREATMENT 0.82 0.12 0.22 249  micro avg 0.45 0.07 0.13 431  macro avg 0.21 0.03 0.06 431 weighted avg 0.48 0.07 0.13 431
3,0.0296,0.025538,0.382353,0.180974,0.245669,precision recall f1-score support  DIAGNOSIS 0.06 0.03 0.04 120  PROGNOSIS 1.00 0.22 0.36 9  SYMPTOM 0.25 0.08 0.12 53  TREATMENT 0.56 0.27 0.37 249  micro avg 0.38 0.18 0.25 431  macro avg 0.47 0.15 0.22 431 weighted avg 0.39 0.18 0.25 431
4,0.0186,0.027276,0.342541,0.287703,0.312736,precision recall f1-score support  DIAGNOSIS 0.12 0.10 0.11 120  PROGNOSIS 0.62 0.89 0.73 9  SYMPTOM 0.29 0.32 0.31 53  TREATMENT 0.45 0.35 0.39 249  micro avg 0.34 0.29 0.31 431  macro avg 0.37 0.41 0.38 431 weighted avg 0.34 0.29 0.31 431
5,0.0153,0.035356,0.252174,0.403712,0.310437,precision recall f1-score support  DIAGNOSIS 0.16 0.19 0.18 120  PROGNOSIS 0.62 0.89 0.73 9  SYMPTOM 0.19 0.53 0.28 53  TREATMENT 0.30 0.46 0.36 249  micro avg 0.25 0.40 0.31 431  macro avg 0.32 0.52 0.39 431 weighted avg 0.25 0.40 0.31 431
6,0.0118,0.030583,0.323308,0.299304,0.310843,precision recall f1-score support  DIAGNOSIS 0.13 0.14 0.14 120  PROGNOSIS 0.67 0.89 0.76 9  SYMPTOM 0.31 0.40 0.35 53  TREATMENT 0.43 0.33 0.38 249  micro avg 0.32 0.30 0.31 431  macro avg 0.39 0.44 0.41 431 weighted avg 0.34 0.30 0.31 431
7,0.009,0.032891,0.301969,0.320186,0.310811,precision recall f1-score support  DIAGNOSIS 0.11 0.11 0.11 120  PROGNOSIS 0.73 0.89 0.80 9  SYMPTOM 0.32 0.36 0.34 53  TREATMENT 0.37 0.39 0.38 249  micro avg 0.30 0.32 0.31 431  macro avg 0.38 0.44 0.41 431 weighted avg 0.30 0.32 0.31 431
8,0.0068,0.035424,0.280335,0.310905,0.294829,precision recall f1-score support  DIAGNOSIS 0.11 0.11 0.11 120  PROGNOSIS 0.80 0.89 0.84 9  SYMPTOM 0.23 0.40 0.29 53  TREATMENT 0.35 0.37 0.36 249  micro avg 0.28 0.31 0.29 431  macro avg 0.37 0.44 0.40 431 weighted avg 0.28 0.31 0.29 431
9,0.0055,0.036727,0.286713,0.285383,0.286047,precision recall f1-score support  DIAGNOSIS 0.12 0.11 0.11 120  PROGNOSIS 0.80 0.89 0.84 9  SYMPTOM 0.29 0.36 0.32 53  TREATMENT 0.34 0.33 0.34 249  micro avg 0.29 0.29 0.29 431  macro avg 0.39 0.42 0.40 431 weighted avg 0.28 0.29 0.28 431
10,0.0061,0.037129,0.286697,0.290023,0.288351,precision recall f1-score support  DIAGNOSIS 0.12 0.11 0.11 120  PROGNOSIS 0.80 0.89 0.84 9  SYMPTOM 0.28 0.36 0.31 53  TREATMENT 0.35 0.34 0.34 249  micro avg 0.29 0.29 0.29 431  macro avg 0.38 0.42 0.40 431 weighted avg 0.28 0.29 0.29 431


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Test Results: {'eval_loss': 0.029980653896927834, 'eval_precision': 0.3725, 'eval_recall': 0.28875968992248063, 'eval_f1': 0.32532751091703055, 'eval_report': '              precision    recall  f1-score   support\n\n   DIAGNOSIS       0.15      0.13      0.14       142\n   PROGNOSIS       0.76      0.76      0.76        17\n     SYMPTOM       0.25      0.25      0.25        53\n   TREATMENT       0.51      0.35      0.41       304\n\n   micro avg       0.37      0.29      0.33       516\n   macro avg       0.42      0.37      0.39       516\nweighted avg       0.39      0.29      0.33       516\n', 'eval_runtime': 7.0897, 'eval_samples_per_second': 27.646, 'eval_steps_per_second': 3.526, 'epoch': 10.0}


('./biobert_ner_finetuned/tokenizer_config.json',
 './biobert_ner_finetuned/special_tokens_map.json',
 './biobert_ner_finetuned/vocab.txt',
 './biobert_ner_finetuned/added_tokens.json',
 './biobert_ner_finetuned/tokenizer.json')

In [10]:
import shutil
save_directory = "/content/biobert_ner_finetuned"
import os
if os.path.exists(save_directory):
    print(f"Model saved successfully to {save_directory}")

    # Zip the model folder for easy download
    shutil.make_archive("physician_notetaker_model", "zip", save_directory)
    print("Model zipped as physician_notetaker_model.zip for download")
else:
    print("Error: Model save failed")

Model saved successfully to /content/biobert_ner_finetuned
Model zipped as physician_notetaker_model.zip for download


In [11]:
import shutil

# Path to the folder you want to zip
folder_to_zip = '/content/biobert_ner_finetuned'

# Path and name for the zip file
zip_file_path = 'notemaker.zip'

# Create the zip file
shutil.make_archive('/content/biobert_ner_finetuned', 'zip', '/content', 'biobert_ner_finetuned')


'/content/biobert_ner_finetuned.zip'

# Processing and Pipeline Setup

* This section sets up the processing pipeline for NER, sentiment analysis, and intent detection. It uses the fine-tuned BioBERT model for NER and a medical-specific sentiment model for sentiment analysis

In [6]:
from transformers import pipeline, BertForTokenClassification, BertTokenizerFast
from sklearn.feature_extraction.text import TfidfVectorizer

# Load fine-tuned NER model
ner_model = BertForTokenClassification.from_pretrained("./biobert_ner_finetuned")
ner_tokenizer = BertTokenizerFast.from_pretrained("./biobert_ner_finetuned")
ner_pipeline = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple")

# Other pipelines
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion")# Medical-specific model
zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
intents = ["Seeking reassurance", "Reporting symptoms", "Expressing concern"]

Device set to use cuda:0


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

KeyboardInterrupt: 

# Command-Line Interface (CLI) for Transcript Processing

* This section provides a CLI for users to input medical transcripts and receive structured outputs, including medical details, sentiment analysis, and SOAP notes. It uses the processing pipeline to generate results in real-time.

In [None]:
def process_transcript(transcript):
    # NER
    ner_results = ner_pipeline(transcript)
    entity_dict = {"Symptoms": [], "Diagnosis": [], "Treatments": [], "Prognosis": []}
    for entity in ner_results:
        label = entity["entity_group"]
        text = entity["word"]
        if "SYMPTOM" in label:
            entity_dict["Symptoms"].append(text)
        elif "DIAGNOSIS" in label:
            entity_dict["Diagnosis"].append(text)
        elif "TREATMENT" in label:
            entity_dict["Treatments"].append(text)
        elif "PROGNOSIS" in label:
            entity_dict["Prognosis"].append(text)

    # Summary and keywords
    cleaned = " ".join(transcript.split("\n"))
    summary = summarizer(cleaned, max_length=50, min_length=20)[0]["summary_text"]
    vectorizer = TfidfVectorizer(stop_words="english", max_features=5)
    tfidf_matrix = vectorizer.fit_transform([cleaned])
    keywords = vectorizer.get_feature_names_out().tolist()

    medical_details = {
        "Patient_Name": "Unknown" if "Mr." not in transcript and "Ms." not in transcript else transcript.split("Mr." if "Mr." in transcript else "Ms.")[1].split()[0],
        "Symptoms": list(set(entity_dict["Symptoms"])),
        "Diagnosis": list(set(entity_dict["Diagnosis"])),
        "Treatments": list(set(entity_dict["Treatments"])),
        "Current_Status": "Occasional symptoms" if "occasional" in cleaned.lower() else "Improving",
        "Prognosis": list(set(entity_dict["Prognosis"])) or ["Full recovery expected"]
    }

    # Sentiment and Intent with emotion-based mapping
    patient_lines = [line.split("Patient:")[1].strip() for line in transcript.split("\n") if "Patient:" in line]
    sentiment_intent = []
    for line in patient_lines:
        sentiment_result = sentiment_analyzer(line)[0]
        emotion = sentiment_result["label"]
        score = sentiment_result["score"]

        # Map emotions to your sentiment classes
        if emotion in ["fear", "sadness"] and score > 0.7:
            sentiment = "Anxious"
        elif emotion in ["joy", "love"] and score > 0.7:
            sentiment = "Reassured"
        else:
            sentiment = "Neutral"

        # Intent detection with zero-shot classification
        intent_result = zero_shot(line, intents)
        intent = intent_result["labels"][0]
        sentiment_intent.append({"Line": f"Patient: {line}", "Sentiment": sentiment, "Intent": intent})

    # SOAP Note
    soap = {
        "Subjective": {
            "Chief_Complaint": " and ".join(medical_details["Symptoms"]) or "Not specified",
            "History_of_Present_Illness": summary
        },
        "Objective": {
            "Physical_Exam": next((line.split("Physician:")[1].strip() for line in transcript.split("\n") if "examination" in line.lower() or "check" in line.lower() or "vitals" in line.lower()), "Not recorded"),
            "Observations": "Patient appears stable."
        },
        "Assessment": {
            "Diagnosis": medical_details["Diagnosis"] or ["Pending evaluation"],
            "Severity": "Mild, improving" if "better" in cleaned.lower() else "Under evaluation"
        },
        "Plan": {
            "Treatment": medical_details["Treatments"] or ["Monitor and manage symptoms"],
            "Follow-Up": "Return if symptoms worsen or persist beyond six months."
        }
    }

    return medical_details, sentiment_intent, soap

In [None]:
def run_cli():
    print("Physician Notetaker CLI with Fine-Tuned BioBERT NER")

    # Smaller sample transcript
    sample_transcript = """Physician: Good morning, Ms. Smith. How are you feeling today?
Patient: Morning, doctor. I’ve had a sore throat and fever for two days.
Physician: Any treatment so far?
Patient: I’ve been taking ibuprofen, but it’s not helping much.
Physician: Let’s check your throat. [Pause] It looks like a mild infection.
Physician: Rest and fluids should help you recover in a few days."""

    print("\nProcessing sample transcript:")
    print(sample_transcript)

    # Process the transcript with error handling
    try:
        med_details, sent_intent, soap = process_transcript(sample_transcript)
        print("\nMedical Details:")
        print(med_details)
        print("\nSentiment & Intent:")
        for item in sent_intent:
            print(item)
        print("\nSOAP Note:")
        print(soap)
    except Exception as e:
        print(f"Error processing sample transcript: {e}")

    # Interactive CLI loop with error handling
    while True:
        print("\nEnter your own transcript (or 'exit' to quit):")
        user_input = input("Transcript: ")
        if user_input.lower() == "exit":
            break
        if not user_input.strip():
            print("Please enter a valid transcript.")
            continue

        try:
            med_details, sent_intent, soap = process_transcript(user_input)
            print("\nMedical Details:")
            print(med_details)
            print("\nSentiment & Intent:")
            for item in sent_intent:
                print(item)
            print("\nSOAP Note:")
            print(soap)
        except Exception as e:
            print(f"Error processing user transcript: {e}")

if __name__ == "__main__":
    run_cli()