In [1]:
import torch
from torch.utils.data import Dataset
import numpy as np
from datasets import Dataset as HFDataset, DatasetDict
from medmnist import BreastMNIST, PneumoniaMNIST
from transformers import AutoModelForImageClassification, AutoImageProcessor, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, roc_auc_score
import random
from collections import Counter
from sklearn.utils import resample
import datasets

  from .autonotebook import tqdm as notebook_tqdm


# Dataset Parsing
The fist step was to convert the MedMNIST dataset into a format suitable for Hugging Face model, either the datsets suffering by strong unbalancing so we implemented a balanced version for each dataset

In [2]:
class MedMNISTtoHF(Dataset):
    def __init__(self, medmnist_dataset):
        """
        Convert MedMNIST dataset to a format compatible with HuggingFace models
        Args:
            medmnist_dataset: The original MedMNIST dataset
            transform: Optional transforms to be applied to the images
        """
        self.dataset = medmnist_dataset
        

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, label = self.dataset[idx]

        return {
            "image": img,
            "label": torch.tensor(label, dtype=torch.long)
        }

def convert_medmnist_to_hf(medmnist_dataset):
    """
    Convert a MedMNIST dataset to a HuggingFace dataset
    Args:
        medmnist_dataset: The original MedMNIST dataset
    Returns:
        The HuggingFace dataset
    """
    # Create wrapper dataset
    wrapper_dataset = MedMNISTtoHF(medmnist_dataset)
    
    # Convert to HF format
    images = []
    labels = []
    
    for i in range(len(wrapper_dataset)):
        sample = wrapper_dataset[i]
        images.append(sample["image"])
        labels.append(sample["label"].item())
    
    # Create HF dataset
    hf_dataset = HFDataset.from_dict({
        "image": images,
        "label": labels
    })
    
    return hf_dataset

def dataset_balancing(dataset, alpha):
    """	
    Balance the dataset by oversampling the minority class
    Args:
        dataset: The original dataset
        alpha: The oversampling factor
    Returns:
        The balanced dataset
    """

    mj_class = Counter(dataset['label']).most_common(1)[0][0] # majority class
    mn_class = abs(mj_class-1) # minority class
    data = dataset['image'] # images extracted from the dataset

    mask = [lb== mj_class for lb in dataset['label']] # mask to separate majority and minority class
    X_majority = [img for img,flag in zip(data, mask) if flag] # majority class images
    X_minority = [img for img,flag in zip(data, mask) if not flag] # minority class images
    new_len_majority = len(X_minority) + int(alpha*len(X_minority)) # new length of the majority class
    X_majority_resampled = resample(X_majority, 
                                    replace=False,  # No replacement
                                    n_samples=new_len_majority,  # Match minority class size
                                    random_state=42)
    X_resampled = X_majority_resampled + X_minority # resampled dataset
    y_resampled = [mj_class]*new_len_majority + [mn_class]*len(X_minority) # resampled labels
    random.seed(42)
    random.shuffle(X_resampled) # shuffle the dataset
    random.seed(42)
    random.shuffle(y_resampled) # shuffle the labels

    dict_blanced_dataset = {
        "image": X_resampled,
        "label": y_resampled
    } # dictionary of the balanced dataset
    balanced_dataset = datasets.Dataset.from_dict(dict_blanced_dataset) # convert the dictionary to a HuggingFace dataset
    return balanced_dataset

def load_dataset_medmnist(dataset_name, size, balancing):
    """
    Load a MedMNIST dataset and convert it to a HuggingFace dataset
    Args:
        dataset_name: The name of the MedMNIST dataset
        size: The size of the images
        balancing: Whether to balance the dataset
    Returns:
        The HuggingFace dataset
    """
    # Load MedMNIST dataset
    train_dataset = dataset_name(split='train', download=True, size=size) 
    val_dataset = dataset_name(split='val', download=True, size=size) 
    test_dataset = dataset_name(split='test', download=True, size=size)

    # Convert to HuggingFace dataset
    hf_train_dataset = convert_medmnist_to_hf(train_dataset)
    hf_val_dataset = convert_medmnist_to_hf(val_dataset)
    hf_test_dataset = convert_medmnist_to_hf(test_dataset)

    # Balancing
    if balancing: 
        hf_train_dataset_balanced = dataset_balancing(hf_train_dataset, 0.5) # balance the training dataset
        dataset = DatasetDict({"train": hf_train_dataset_balanced, "validation": hf_val_dataset, "test": hf_test_dataset}) 
        return dataset
    else:
        dataset = DatasetDict({"train": hf_train_dataset, "validation": hf_val_dataset, "test": hf_test_dataset})
        return dataset

# Dataset Loading

In [3]:
breast_dataset_28 = load_dataset_medmnist(BreastMNIST, 28, False)
breast_dataset_balanced_28 = load_dataset_medmnist(BreastMNIST, 28, True)
breast_dataset_224 = load_dataset_medmnist(BreastMNIST, 224, False)
breast_dataset_balanced_224 = load_dataset_medmnist(BreastMNIST, 224, True)

pneumonia_dataset_28 = load_dataset_medmnist(PneumoniaMNIST, 28, False)
pneumonia_dataset_balanced_28 = load_dataset_medmnist(PneumoniaMNIST, 28, True)
pneumonia_dataset_224 = load_dataset_medmnist(PneumoniaMNIST, 224, False)
pneumonia_dataset_balanced_224 = load_dataset_medmnist(PneumoniaMNIST, 224, True)

Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.

# Freezer Functions

In [4]:
def beit_freezer(model):
    for name, param in model.named_parameters():
        if not name.startswith("classifier") \
            and not name.startswith("beit.pooler")\
            and not name.startswith("beit.encoder.layer.23")\
            and not name.startswith("beit.encoder.layer.22")\
            and not name.startswith("beit.encoder.layer.21")\
            and not name.startswith("beit.encoder.layer.20")\
            and not name.startswith("beit.encoder.layer.19"):
            param.requires_grad = False

# Reproducibility Function

In [5]:
def set_reproducibility(seed=42):
    # Set seeds for reproducibility
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) 
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False 

# Model Function

In [6]:
def T_and_T(dataset, model_name, output_dir, batch_size, weight_decay, Training, Testing, freezer):
    # Set Seed
    set_reproducibility()

    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Check if GPU is available
    
    # Load the BEiT-large model and image processor
    model = AutoModelForImageClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True).to(device) # Load the model
    processor = AutoImageProcessor.from_pretrained(model_name) # Load the image processor

    # Freeze some layers of the model
    freezer(model) 

    # Verify which layers are trainable
    trainable_params = [name for name, param in model.named_parameters() if param.requires_grad]
    print(f"Trainable parameters: {trainable_params}")

    # Define preprocessing function
    def preprocess_images(examples):
        images = [processor(image.convert("RGB"), return_tensors="pt") for image in examples["image"]] # Convert images to RGB format
        pixel_values = torch.stack([image["pixel_values"].squeeze() for image in images]) # Stack the pixel values
        labels = torch.tensor(examples["label"]) # Get the labels
        return {"pixel_values": pixel_values, "labels": labels} # Return the pixel values and labels

    # Preprocess the dataset
    train_dataset = dataset["train"].with_transform(preprocess_images)
    validation_dataset = dataset["validation"].with_transform(preprocess_images)
    test_dataset = dataset["test"].with_transform(preprocess_images)

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=output_dir,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-4,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=5,
        weight_decay=weight_decay, # Regularization
        logging_dir="./logs", 
        logging_steps=10,
        save_total_limit=1,
        remove_unused_columns=False,
        push_to_hub=False,
        seed=42,
    )

    # Define Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=validation_dataset,
        tokenizer=processor,
    )
    if Training:
        trainer.train()
        model.save_pretrained(output_dir)
        processor.save_pretrained(output_dir)

    if Testing:
        predictions = trainer.predict(test_dataset)
        pred_labels = predictions.predictions.argmax(axis=1)
        true_labels = predictions.label_ids

        # Calculate the metrics
        accuracy = accuracy_score(true_labels, pred_labels)
        auc = roc_auc_score(true_labels, pred_labels)

        # Display the metrics
        print(f"Accuracy: {accuracy:.4f}")
        print(f"AUC: {auc:.4f}")
        


# Training and Testing

In [10]:
T_and_T(breast_dataset_28, "microsoft/beit-large-patch16-224-pt22k", "beit_breast_28", 32, 0.01, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,1.2333,0.567551
2,0.5926,0.543375
3,0.4712,0.417184
4,0.3856,0.368475
5,0.3299,0.29384


Accuracy: 0.8910
AUC: 0.8202


In [51]:
T_and_T(breast_dataset_balanced_28, "microsoft/beit-large-patch16-224-pt22k", "beit_breast_balanced_28", 16, 0, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.6828,0.711752
2,0.6289,0.44271
3,0.5924,0.359735
4,0.4519,0.278309
5,0.3733,0.27798


Accuracy: 0.8846
AUC: 0.8459


In [52]:
T_and_T(breast_dataset_224, "microsoft/beit-large-patch16-224-pt22k", "beit_breast_224", 16, 0.1, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.5811,0.421221
2,0.5019,0.303188
3,0.4039,0.276487
4,0.1224,0.250388
5,0.1555,0.296184


Accuracy: 0.8782
AUC: 0.7964


In [53]:
T_and_T(breast_dataset_balanced_224, "microsoft/beit-large-patch16-224-pt22k", "beit_breast_balanced_224", 16, 0.1, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.733,0.631534
2,0.6625,0.700514
3,0.5628,0.300123
4,0.3045,0.285789
5,0.2129,0.257943


Accuracy: 0.8782
AUC: 0.8264


In [7]:
T_and_T(pneumonia_dataset_28, "microsoft/beit-large-patch16-224-pt22k", "beit_pneumonia_28", 64, 0.1, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
  trainer = Trainer(


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

Epoch,Training Loss,Validation Loss
1,0.2592,0.116012
2,0.1199,0.14533
3,0.0751,0.085481
4,0.0612,0.066247
5,0.0239,0.05915


Accuracy: 0.8846
AUC: 0.8487


In [9]:
T_and_T(pneumonia_dataset_balanced_28, "microsoft/beit-large-patch16-224-pt22k", "beit_pneumonia_balanced_28", 32, 0.1, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.2087,0.222284
2,0.1077,0.099022
3,0.1426,0.081268
4,0.0611,0.140866
5,0.0374,0.097864


Accuracy: 0.9183
AUC: 0.8962


In [9]:
T_and_T(pneumonia_dataset_224, "microsoft/beit-large-patch16-224-pt22k", "beit_pneumonia_224", 32, 0.1, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.0829,0.078521
2,0.0511,0.038238
3,0.0188,0.039926
4,0.0099,0.043594
5,0.0039,0.031939


Accuracy: 0.9038
AUC: 0.8726


In [59]:
T_and_T(pneumonia_dataset_balanced_224, "microsoft/beit-large-patch16-224-pt22k", "beit_pneumonia_balanced_224", 32, 0.1, True, True, beit_freezer)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.19.lambda_1', 'beit.encoder.layer.19.lambda_2', 'beit.encoder.layer.19.attention.attention.query.weight', 'beit.encoder.layer.19.attention.attention.query.bias', 'beit.encoder.layer.19.attention.attention.key.weight', 'beit.encoder.layer.19.attention.attention.value.weight', 'beit.encoder.layer.19.attention.attention.value.bias', 'beit.encoder.layer.19.attention.output.dense.weight', 'beit.encoder.layer.19.attention.output.dense.bias', 'beit.encoder.layer.19.intermediate.dense.weight', 'beit.encoder.layer.19.intermediate.dense.bias', 'beit.encoder.layer.19.output.dense.weight', 'beit.encoder.layer.19.output.dense.bias', 'beit.encoder.layer.19.layernorm_before.weight', 'beit.encoder.layer.19.layernorm_before.bias', 'beit.encoder.layer.19.layernorm_after.weight', 'beit.encoder.layer.19.layernorm_after.bias', 'beit.encoder.layer.20.lambda_1', 'beit.encoder.layer.20.lambda_2', 'beit.encoder.layer.20.attention.attention.query.weight', 'beit.encoder

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.1028,0.106134
2,0.0408,0.054539
3,0.0601,0.050496
4,0.0037,0.061132
5,0.0166,0.052801


Accuracy: 0.9231
AUC: 0.8991
