### Master testing notebook
Notebook for computing test parameters on all trained models

In [1]:
%%capture
!pip install datasets evaluate multimolecule==0.0.5

In [2]:
import os
import pandas as pd
import torch
from transformers import (
    DataCollatorForTokenClassification,
    TrainingArguments,
    Trainer
)
from datasets import Dataset
from google.colab import drive

import matplotlib.pyplot as plt

In [4]:
drive.mount('/content/drive')

WORKING_DIRECTORY = '/content/drive/MyDrive/Machine_Learning_(CS-433)/Project_2'
DATASET_PATH = 'data/dataset.txt'

%cd {WORKING_DIRECTORY}

Mounted at /content/drive
/content/drive/MyDrive/Machine_Learning_(CS-433)/Project_2


In [5]:
from BP_LM.scripts.data_preprocessing import *
from BP_LM.scripts.trainer_datasets_creation import create_dataset
from BP_LM.scripts.compute_metrics import compute_metrics_test, precision_recall_data
from BP_LM.scripts.model_choice import set_multimolecule_model

os.environ["WANDB_MODE"] = "disabled"

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [6]:
# Model and dataset variables
MULTIMOLECULE_MODEL = "rnabert" # Available models are: rnafm, rnamsm, ernierna, utrlm-te_el, splicebert, rnabert.
SAMPLE_N_DATAPOINTS = 10000  # Set to None to use the full dataset
SEED = 32
BATCH_SIZE = 16

In [7]:
# Initialize the selected multimolecule model
trained_model, tokenizer, MODEL_MAX_INPUT_SIZE, ideal_threshold = set_multimolecule_model(MULTIMOLECULE_MODEL, for_testing = True) #Requires there is saved model at f"{MULTIMOLECULE_MODEL}-finetuned-secondary-structure/best_model"

# Load data and create dataset
df = pd.read_csv(DATASET_PATH, sep='\t')
train_dataset, val_dataset, test_dataset = create_dataset(df, tokenizer, trained_model, MODEL_MAX_INPUT_SIZE, SEED, SAMPLE_N_DATAPOINTS)

# Set up data collator
data_collator = DataCollatorForTokenClassification(tokenizer)

Chromosomes in train set: {'chrY', 'chr2', 'chr18', 'chr19', 'chr7', 'chr5', 'chr1', 'chr17', 'chrX', 'chr14', 'chr13', 'chr16', 'chr3', 'chr20', 'chr6', 'chr12', 'chr4', 'chr22', 'chr21', 'chr15'}
Chromosomes in validation set: {'chr9', 'chr10'}
Chromosomes in test set: {'chr8', 'chr11'}

Total data points: 10000
Train set contains 8287 data points (82.87%)
Validation set contains 807 data points (8.07%)
Test set contains 906 data points (9.06%)


In [9]:
metric = lambda x: compute_metrics_test1(x, MULTIMOLECULE_MODEL, False, ideal_threshold)

testing_args = TrainingArguments(
    output_dir='/results',
    per_device_eval_batch_size=BATCH_SIZE,
    do_eval=True,
    no_cuda=False,
)

tester = Trainer(
    model=trained_model,
    args=testing_args,
    eval_dataset=test_dataset,
    compute_metrics=metric,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

metrics = tester.evaluate()

print("Evaluation Metrics:")
for key, value in metrics.items():
    print(f"{key}: {value}")

  tester = Trainer(




Evaluation Metrics:
eval_loss: 0.008999074809253216
eval_model_preparation_time: 0.0018
eval_F1: 0.3345145287030475
eval_seq_accuracy: 0.24172185430463577
eval_AP: 0.31030180517800665
eval_MCC: 0.3476570999211874
eval_AUC: 0.9868648603397809
eval_runtime: 7.1467
eval_samples_per_second: 126.771
eval_steps_per_second: 7.976
