In [21]:
!pip install -U sacremoses  # biogpt tokenizer



In [22]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
import re
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm.auto import tqdm

In [42]:
import os
from google.colab import drive
drive.mount('/content/gdrive')

path = "/content/gdrive/MyDrive/"
os.listdir(path)
data = pd.read_csv(path+'test.csv')
data.head()

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Unnamed: 0,text,readmission_30
0,Left periprosthetic femur fracture,1.0
1,"Primary: Acute otitis media, mastoiditis, Bell...",1.0
2,Elevated LFTsHA stenosisBiliary stricture with...,1.0
3,"Pharyngitis, supraglottitisAcute Kidney Injury...",1.0
4,Chronic distal common bile duct stricture,1.0


In [50]:
data['readmission_30'] = data['readmission_30'].astype('int64')
data['readmission_30'].value_counts()

0    6232
1    3768
Name: readmission_30, dtype: int64

In [51]:
def extract_and_combine_conditions(text):
    parts = text.split('Secondary:')
    primary_conditions = parts[0].replace('Primary:', '').strip()
    secondary_conditions = parts[1].strip() if len(parts) > 1 else ''
    combined_conditions = primary_conditions + ', ' + secondary_conditions if secondary_conditions else primary_conditions
    return combined_conditions

In [52]:
def clean_text(text):
    text = text.lower() 
    text = re.sub(r'\b(patientname|patientid):\s*\S+', '', text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = extract_and_combine_conditions(text)
    return text

In [53]:
class ReadmissionDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings  
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(self.encodings[key][idx]) for key in self.encodings}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

In [54]:
data['clean_text']=data['text'].apply(clean_text)

In [55]:
test_texts, test_labels = data['clean_text'], data['readmission_30']

In [56]:
blue_tokenizer = AutoTokenizer.from_pretrained('Tolerblanc/blue-BERT-Readmission')
bioC_tokenizer = AutoTokenizer.from_pretrained('Tolerblanc/bioClinical-BERT-Readmission')
sapB_tokenizer = AutoTokenizer.from_pretrained('Tolerblanc/sapBERT-Readmission')
bgpt_tokenizer = AutoTokenizer.from_pretrained('Tolerblanc/biogpt_Readmission')
cg_tokenizer = AutoTokenizer.from_pretrained('medicalai/ClinicalBERT')

In [57]:
# 토크나이저 사용
blue_test_encodings = blue_tokenizer(test_texts.tolist(), truncation=True, padding=True, max_length=128)
bioC_test_encodings = bioC_tokenizer(test_texts.tolist(), truncation=True, padding=True, max_length=128)
sapB_test_encodings = sapB_tokenizer(test_texts.tolist(), truncation=True, padding=True, max_length=128)
bgpt_test_encodings = bgpt_tokenizer(test_texts.tolist(), truncation=True, padding=True, max_length=128)
cg_test_encodings = cg_tokenizer(test_texts.tolist(), truncation=True, padding=True, max_length=128)

In [58]:
# 데이터셋 생성
blue_test_dataset = ReadmissionDataset(blue_test_encodings, test_labels.tolist())
bioC_test_dataset = ReadmissionDataset(bioC_test_encodings, test_labels.tolist())
sapB_test_dataset = ReadmissionDataset(sapB_test_encodings, test_labels.tolist())
bgpt_test_dataset = ReadmissionDataset(bgpt_test_encodings, test_labels.tolist())
cg_test_dataset = ReadmissionDataset(cg_test_encodings, test_labels.tolist())

In [59]:
blue_test_loader = DataLoader(blue_test_dataset, batch_size=16, shuffle=False)
bioC_test_loader = DataLoader(bioC_test_dataset, batch_size=16, shuffle=False)
sapB_test_loader = DataLoader(sapB_test_dataset, batch_size=16, shuffle=False)
bgpt_test_loader = DataLoader(bgpt_test_dataset, batch_size=16, shuffle=False)
cg_test_loader = DataLoader(cg_test_dataset, batch_size=16, shuffle=False)

In [60]:
blue_model = AutoModelForSequenceClassification.from_pretrained('Tolerblanc/blue-BERT-Readmission', num_labels=data['readmission_30'].nunique())
bioC_model = AutoModelForSequenceClassification.from_pretrained('Tolerblanc/bioClinical-BERT-Readmission', num_labels=data['readmission_30'].nunique())
sapB_model = AutoModelForSequenceClassification.from_pretrained('Tolerblanc/sapBERT-Readmission', num_labels=data['readmission_30'].nunique())
bgpt_model = AutoModelForSequenceClassification.from_pretrained('Tolerblanc/biogpt_Readmission', num_labels=data['readmission_30'].nunique())
cg_model = AutoModelForSequenceClassification.from_pretrained('medicalai/ClinicalBERT', num_labels=data['readmission_30'].nunique())

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at medicalai/ClinicalBERT and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [61]:
# 추론에 GPU사용 시
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
blue_model.to(device)
bioC_model.to(device)
sapB_model.to(device)
bgpt_model.to(device)
cg_model.to(device)

criterion = nn.CrossEntropyLoss()

In [62]:
from sklearn.metrics import f1_score, recall_score, accuracy_score, precision_score
import torch

def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            loss = criterion(logits, labels)
            total_loss += loss.item()

            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    average_loss = total_loss / len(test_loader)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')

    return average_loss, f1, recall, accuracy, precision


In [63]:
test_loss, test_f1, test_recall, test_acc, test_prec = evaluate(blue_model, blue_test_loader, criterion, device)
print(':: :: Finetuned Blue-BERT Benchmark  :: :: ')
print(f'Test Loss: {test_loss:.4f}, F1 Score: {test_f1:.4f}, Recall: {test_recall:.4f}, Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}')

:: :: Finetuned Blue-BERT Benchmark  :: :: 
Test Loss: 0.6153, F1 Score: 0.6308, Recall: 0.6666, Accuracy: 0.6666, Precision: 0.6554


In [64]:
test_loss, test_f1, test_recall, test_acc, test_prec = evaluate(bioC_model, bioC_test_loader, criterion, device)
print(':: :: Finetuned bioClinical-BERT Benchmark  :: :: ')
print(f'Test Loss: {test_loss:.4f}, F1 Score: {test_f1:.4f}, Recall: {test_recall:.4f}, Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}')

:: :: Finetuned bioClinical-BERT Benchmark  :: :: 
Test Loss: 0.6189, F1 Score: 0.6568, Recall: 0.6654, Accuracy: 0.6654, Precision: 0.6554


In [65]:
test_loss, test_f1, test_recall, test_acc, test_prec = evaluate(sapB_model, sapB_test_loader, criterion, device)
print(':: :: Finetuned Sap-BERT Benchmark  :: :: ')
print(f'Test Loss: {test_loss:.4f}, F1 Score: {test_f1:.4f}, Recall: {test_recall:.4f}, Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}')

:: :: Finetuned Sap-BERT Benchmark  :: :: 
Test Loss: 0.6213, F1 Score: 0.6688, Recall: 0.6816, Accuracy: 0.6816, Precision: 0.6710


In [66]:
test_loss, test_f1, test_recall, test_acc, test_prec = evaluate(bgpt_model, bgpt_test_loader, criterion, device)
print(':: :: Finetuned biogpt Benchmark  :: :: ')
print(f'Test Loss: {test_loss:.4f}, F1 Score: {test_f1:.4f}, Recall: {test_recall:.4f}, Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}')

:: :: Finetuned biogpt Benchmark  :: :: 
Test Loss: 0.6278, F1 Score: 0.5854, Recall: 0.6532, Accuracy: 0.6532, Precision: 0.6510


In [67]:
test_loss, test_f1, test_recall, test_acc, test_prec = evaluate(cg_model, cg_test_loader, criterion, device)
print(':: :: [Control Group] clinicalBERT Benchmark  :: :: ')
print(f'Test Loss: {test_loss:.4f}, F1 Score: {test_f1:.4f}, Recall: {test_recall:.4f}, Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}')

:: :: [Control Group] clinicalBERT Benchmark  :: :: 
Test Loss: 0.6985, F1 Score: 0.2313, Recall: 0.3844, Accuracy: 0.3844, Precision: 0.5829
