In [16]:
import torch
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification

if torch.cuda.is_available():
    print(f"GPU found, using: {torch.cuda.get_device_name(0)}")
    device = torch.device("cuda")
else:
    print("GPU not found")
    device = torch.device("cpu")

GPU found, using: NVIDIA GeForce RTX 3070


In [17]:
def load_model_and_mappings(model_path, mappings):
    try:
        model = AutoModelForSequenceClassification.from_pretrained(model_path)
        tokenizer = AutoTokenizer.from_pretrained(model_path)
    except Exception as e:
        print(f"Error loading model or tokenizer, make sure the model's path does exist: {e}")
        return None, None, None, None

    try:
        with open(mappings, 'r') as f:
            specialty_and_id_map = json.load(f)
    except:
        print(f"Data not found, make sure to run the specialty_data_preprocessing.ipynb file in its entirety to retrieve the data")

    id_to_label = {int(k): v for k, v in specialty_and_id_map['id_to_label'].items()}
    label_to_id = specialty_and_id_map['label_to_id']
        
    return model, tokenizer, id_to_label, label_to_id

def predict_specialty(text_input, model_path, mappings, device):

    model, tokenizer, id_to_label, _ = load_model_and_mappings(model_path, mappings)
    if model is None:
        return None, None
    
    model.to(device)
    model.eval()

    inputs = tokenizer(
        text_input,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad(): 
        outputs = model(**inputs)
    
    logits = outputs.logits
    
    probabilities = torch.nn.functional.softmax(logits, dim=-1)[0]
    
    predicted_class_id = torch.argmax(probabilities).item()
    confidence = probabilities[predicted_class_id].item()
    
    predicted_specialty = id_to_label.get(predicted_class_id, "Unknown Label")
    
    return predicted_specialty, confidence


In [18]:
MAPPINGS = '../../Data/Specialty-Data/specialty_data_label_mappings.json'
# Use absolute file path
# Change MODEL to Model you want to test
MODEL = r"E:\Trend-Interview\Codebase\Model-Development\Specialty-Models\Saved-Models\BERT\microsoft\BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext\5-classes-unweighted-training_run_2025-11-08_20-52-22\final_model"

TRANSCRIPTION = """
2-D M-MODE: , ,1.  Left atrial enlargement with left atrial diameter of 4.7 cm.,2.  Normal size right and left ventricle.,3.  Normal LV systolic function with left ventricular ejection fraction of 51%.,4.  Normal LV diastolic function.,5.
No pericardial effusion.,6.  Normal morphology of aortic valve, mitral valve, tricuspid valve, and pulmonary valve.,7.  PA systolic pressure is 36 mmHg.,DOPPLER: , ,1.  Mild mitral and tricuspid regurgitation.,2.  Trace aortic and pulmonary regurgitation.
"""

specialty, confidence = predict_specialty(TRANSCRIPTION, MODEL, MAPPINGS, device)

if specialty is None or confidence is None:
    print(f"Nothing returned, make sure that the  directory is correct")
else:
    print(f"PREDICTION")
    print("-------------------------------------")
    print(f"Specialty:   {specialty}")
    print(f"Confidence:  {confidence*100:.2f}%")
    print("-------------------------------------")

PREDICTION
-------------------------------------
Specialty:    Cardiovascular / Pulmonary
Confidence:  64.94%
-------------------------------------
