In [1]:
import os
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import f1_score, roc_auc_score
from torch.utils.data import DataLoader
from transformers import RobertaTokenizer, RobertaModel
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import deepchem as dc

No normalization for SPS. Feature removed!
No normalization for AvgIpc. Feature removed!
Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch_geometric'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. cannot import name 'DMPNN' from 'deepchem.models.torch_models' (/HDD1/bbq9088/miniconda3/envs/molberta/lib/python3.10/site-packages/deepchem/models/torch_models/__init__.py)
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'
Skipped loading some PyTorch models, missing a dependency. No module named 'tensorflow'


In [2]:
# GPU 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cuda


In [3]:
# 로컬에서 Roberta 모델과 Tokenizer 로드
tokenizer_path = "./origin_model/roberta/tokenizer_folder"
model_path = "./origin_model/roberta"

In [4]:
# 토크나이저 로드
tokenizer = RobertaTokenizer.from_pretrained(tokenizer_path)

In [5]:
# 모델 로드 함수 (동적으로 num_labels 설정)
def load_model(model_path, num_labels):
    return RobertaForSequenceClassification.from_pretrained(model_path, num_labels=num_labels).to(device)

In [6]:
# 데이터셋 로드 함수
def load_dataset(dataset_name, featurizer):
    if dataset_name == "ClinTox":
        tasks, datasets, transformers = dc.molnet.load_clintox(featurizer=featurizer, splitter=None, transformers=[], reload=True)
        dataset = datasets[0]
        df = pd.DataFrame({
            'smiles': dataset.ids,
            'FDA_APPROVED': dataset.y[:, 0],
            'CT_TOX': dataset.y[:, 1]
        }).dropna()
        return df

    elif dataset_name == "SIDER":
        tasks, datasets, transformers = dc.molnet.load_sider(featurizer=featurizer, splitter=None, transformers=[], reload=True)
        dataset = datasets[0]
        df = pd.DataFrame(data=dataset.y, columns=tasks)
        df['smiles'] = dataset.ids
        df = df.dropna()
        return df

    elif dataset_name == "Tox21":
        tasks, datasets, transformers = dc.molnet.load_tox21(featurizer=featurizer, splitter=None, transformers=[], reload=True)
        dataset = datasets[0]
        df = pd.DataFrame(data=dataset.y, columns=tasks)
        df['smiles'] = dataset.ids
        df = df.dropna()
        return df

    else:
        raise ValueError(f"Unknown dataset: {dataset_name}")

In [7]:
def prepare_data(df):
    texts = [f"SMILES: {smiles}" for smiles in df['smiles']]
    label_columns = df.columns.difference(['smiles'])
    labels = df[label_columns].values.astype(np.float32)
    return texts, labels

In [8]:
class SMILESDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        inputs = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=512)
        inputs['labels'] = torch.tensor(label, dtype=torch.float)
        return {key: val.squeeze(0) for key, val in inputs.items()}

In [9]:
def evaluate_dataset(dataset_name, featurizer, model_path, tokenizer, batch_size=16):
    print(f"\n=== Evaluating {dataset_name} ===")

    # 데이터셋 로드
    df = load_dataset(dataset_name, featurizer)
    num_labels = len(df.columns) - 1  # 'smiles' 제외한 컬럼 수
    print(f"Number of labels for {dataset_name}: {num_labels}")

    # 모델 로드
    model = load_model(model_path, num_labels)
    model.eval()

    # 데이터 준비
    texts, labels = prepare_data(df)

    # DataLoader 생성
    dataset = SMILESDataset(texts, labels)
    data_loader = DataLoader(dataset, batch_size=batch_size)

    all_preds, all_labels = [], []

    # 모델 추론
    with torch.no_grad():
        for batch in tqdm(data_loader, desc=f"Predicting {dataset_name}"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.sigmoid(logits).cpu().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    # 데이터 크기 확인
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    print("=== Data Shapes ===")
    print(f"all_labels shape: {all_labels.shape}")
    print(f"all_preds shape: {all_preds.shape}")

    # 예측값 0.5 기준으로 이진화
    all_preds = (all_preds > 0.5).astype(int)

    # 성능 평가
    roc_auc = roc_auc_score(all_labels, all_preds, average='macro')
    f1_micro = f1_score(all_labels, all_preds, average='micro')
    f1_macro = f1_score(all_labels, all_preds, average='macro')

    print(f"ROC-AUC (Macro): {roc_auc:.4f}")
    print(f"F1-Score (Micro): {f1_micro:.4f}")
    print(f"F1-Score (Macro): {f1_macro:.4f}")

In [10]:
# Featurizer 설정
featurizer = dc.feat.CircularFingerprint(radius=2, size=2048)

In [11]:
# 데이터셋별 평가
for dataset_name in ["ClinTox", "SIDER", "Tox21"]:
    evaluate_dataset(dataset_name, featurizer, model_path, tokenizer, batch_size=16)

  return torch.load(checkpoint_file, map_location="cpu")



=== Evaluating ClinTox ===
Number of labels for ClinTox: 2


Some weights of the model checkpoint at ./origin_model/roberta were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./origin_model/roberta and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.

=== Data Shapes ===
all_labels shape: (1480, 2)
all_preds shape: (1480, 2)
ROC-AUC (Macro): 0.4861
F1-Score (Micro): 0.0000
F1-Score (Macro): 0.0000

=== Evaluating SIDER ===
Number of labels for SIDER: 27


Some weights of the model checkpoint at ./origin_model/roberta were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./origin_model/roberta and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.

=== Data Shapes ===
all_labels shape: (1427, 27)
all_preds shape: (1427, 27)
ROC-AUC (Macro): 0.5092
F1-Score (Micro): 0.5550
F1-Score (Macro): 0.4083

=== Evaluating Tox21 ===
Number of labels for Tox21: 12


Some weights of the model checkpoint at ./origin_model/roberta were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./origin_model/roberta and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.

=== Data Shapes ===
all_labels shape: (7823, 12)
all_preds shape: (7823, 12)
ROC-AUC (Macro): 0.4954
F1-Score (Micro): 0.1064
F1-Score (Macro): 0.0759



