In [139]:
import torch
from torch.utils.data import Dataset
import numpy as np
from torchvision import transforms
from datasets import Dataset as HFDataset, load_dataset, DatasetDict
from typing import Optional, Tuple, Dict, Any
from medmnist import BreastMNIST, RetinaMNIST
from transformers import AutoModelForImageClassification, AutoImageProcessor, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, recall_score
import random
from imblearn.under_sampling import NearMiss
import pandas as pd
from collections import Counter
from sklearn.utils import resample
import datasets


In [3]:
# Set all seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [4]:
class MedMNISTtoHF(Dataset):
    def __init__(self, 
                 medmnist_dataset,
                 transform: Optional[transforms.Compose] = None):
        """
        Convert MedMNIST dataset to a format compatible with HuggingFace models
        
        Args:
            medmnist_dataset: The original MedMNIST dataset
            transform: Optional transforms to be applied to the images
        """
        self.dataset = medmnist_dataset
        self.transform = transform if transform else transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ])

    def __len__(self) -> int:
        return len(self.dataset)

    def __getitem__(self, idx: int) -> Dict[str, Any]:
        img, label = self.dataset[idx]
        
        # Convert image to float32 tensor if it isn't already
        #if not isinstance(img, torch.Tensor):
            #img = self.transform(img)
        
        # Ensure image has correct number of channels (3 for most HF models)
        #if img.shape[0] == 1:
            #img = img.repeat(3, 1, 1)
            
        # Convert to HF expected format
        return {
            "image": img,
            "label": torch.tensor(label, dtype=torch.long)
        }

def convert_medmnist_to_hf(medmnist_dataset, 
                          split: str = "train") -> HFDataset:
    """
    Convert a MedMNIST dataset to HuggingFace dataset format
    
    Args:
        medmnist_dataset: Original MedMNIST dataset
        split: Dataset split name ("train", "test", or "val")
    
    Returns:
        HuggingFace dataset
    """
    # Create wrapper dataset
    wrapper_dataset = MedMNISTtoHF(medmnist_dataset)
    
    # Convert to HF format
    images = []
    labels = []
    
    for i in range(len(wrapper_dataset)):
        sample = wrapper_dataset[i]
        images.append(sample["image"])
        labels.append(sample["label"].item())
    
    # Create HF dataset
    hf_dataset = HFDataset.from_dict({
        "image": images,
        "label": labels
    })
    
    return hf_dataset

# BreastMNIST Dataset

In [146]:
# Load MedMNIST dataset
train_dataset = BreastMNIST(split='train', download=True, size=224)
val_dataset = BreastMNIST(split='val', download=True, size=224)
test_dataset = BreastMNIST(split='test', download=True, size=224)

hf_train_dataset = convert_medmnist_to_hf(train_dataset, split='train')
hf_val_dataset = convert_medmnist_to_hf(val_dataset, split='val')
hf_test_dataset = convert_medmnist_to_hf(test_dataset, split='test')

# Balancing
mj_class = Counter(hf_train_dataset['label']).most_common(1)[0][0]
mn_class = abs(mj_class-1)
data = hf_train_dataset['image']

mask = [lb== mj_class for lb in hf_train_dataset['label']]
X_majority = [img for img,flag in zip(data, mask) if flag]
X_minority = [img for img,flag in zip(data, mask) if not flag]
new_len_majority = len(X_minority) + int(0.5*len(X_minority))
X_majority_resampled = resample(X_majority, 
                                replace=False,  # No replacement
                                n_samples=new_len_majority,  # Match minority class size
                                random_state=42)
X_resampled = X_majority_resampled+ X_minority
y_resampled = [mj_class]*new_len_majority+[mn_class]*len(X_minority)
random.seed(42)
random.shuffle(X_resampled)
random.seed(42)
random.shuffle(y_resampled)

dict_train_blanced_dataset = {
    "image": X_resampled,
    "label": y_resampled
}
 
train_blanced_dataset = datasets.Dataset.from_dict(dict_train_blanced_dataset)

dataset = DatasetDict({"train": train_blanced_dataset, "validation": hf_val_dataset, "test": hf_test_dataset})
dataset

Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\breastmnist_224.npz


DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 367
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 78
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 156
    })
})

In [155]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the BEiT-large model and image processor
model_name = "microsoft/beit-large-patch16-224-pt22k"
model = AutoModelForImageClassification.from_pretrained(model_name, num_labels=2).to(device) 
processor = AutoImageProcessor.from_pretrained(model_name)

# Freeze all layers except the classifier and the last transformer layer
for name, param in model.named_parameters():
    if not name.startswith("classifier") \
        and not name.startswith("beit.pooler")\
        and not name.startswith("beit.encoder.layer.23") :#\
        #and not name.startswith("beit.encoder.layer.22"):
        param.requires_grad = False

# Verify which layers are trainable
trainable_params = [name for name, param in model.named_parameters() if param.requires_grad]
print(f"Trainable parameters: {trainable_params}")

# Define preprocessing function
def preprocess_images(examples):
    images = [processor(image.convert("RGB"), return_tensors="pt") for image in examples["image"]]
    pixel_values = torch.stack([image["pixel_values"].squeeze() for image in images])
    labels = torch.tensor(examples["label"])
    return {"pixel_values": pixel_values, "labels": labels}

# Preprocess the dataset
train_dataset = dataset["train"].with_transform(preprocess_images)
validation_dataset = dataset["validation"].with_transform(preprocess_images)
test_dataset = dataset["test"].with_transform(preprocess_images)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./beit_breastMNIST",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.5,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    seed=42,
)

# Define Trainer
trainer_beit_breastMNIST = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=processor,
)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.23.lambda_1', 'beit.encoder.layer.23.lambda_2', 'beit.encoder.layer.23.attention.attention.query.weight', 'beit.encoder.layer.23.attention.attention.query.bias', 'beit.encoder.layer.23.attention.attention.key.weight', 'beit.encoder.layer.23.attention.attention.value.weight', 'beit.encoder.layer.23.attention.attention.value.bias', 'beit.encoder.layer.23.attention.output.dense.weight', 'beit.encoder.layer.23.attention.output.dense.bias', 'beit.encoder.layer.23.intermediate.dense.weight', 'beit.encoder.layer.23.intermediate.dense.bias', 'beit.encoder.layer.23.output.dense.weight', 'beit.encoder.layer.23.output.dense.bias', 'beit.encoder.layer.23.layernorm_before.weight', 'beit.encoder.layer.23.layernorm_before.bias', 'beit.encoder.layer.23.layernorm_after.weight', 'beit.encoder.layer.23.layernorm_after.bias', 'beit.pooler.layernorm.weight', 'beit.pooler.layernorm.bias', 'classifier.weight', 'classifier.bias']


  trainer_beit_breastMNIST = Trainer(


In [156]:
# Train the model
trainer_beit_breastMNIST.train()

# Save the model and processor
model.save_pretrained("./beit_breastMNIST")
processor.save_pretrained("./beit_breastMNIST")

# Evaluate on the test set
test_results = trainer_beit_breastMNIST.evaluate(test_dataset)
print(f"Test Results: {test_results}")

Epoch,Training Loss,Validation Loss
1,0.5668,0.498798
2,0.5653,0.410432
3,0.4298,0.3863
4,0.2574,0.433248
5,0.1455,0.448869


Test Results: {'eval_loss': 0.36107274889945984, 'eval_runtime': 4.4894, 'eval_samples_per_second': 34.748, 'eval_steps_per_second': 4.455, 'epoch': 5.0}


In [157]:
predictions = trainer_beit_breastMNIST.predict(test_dataset)
pred_labels = predictions.predictions.argmax(axis=1)
true_labels = predictions.label_ids

# Calculate additional metrics
accuracy = accuracy_score(true_labels, pred_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')
specificity = recall_score(true_labels, pred_labels, pos_label=0)

# Display the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1-Score (weighted): {f1:.4f}")
print(f"Specificity: {specificity:.4f}")

Accuracy: 0.8718
Precision (weighted): 0.8690
Recall (weighted): 0.8718
F1-Score (weighted): 0.8697
Specificity: 0.7143


In [158]:
# Unique classes
classes = np.unique(true_labels)

# Per-class accuracy
per_class_accuracy = {}
for c in classes:
    # Get indices of samples belonging to class c
    indices = true_labels == c

    # Count correct predictions for class c
    correct = np.sum(pred_labels[indices] == true_labels[indices])

    # Total samples in class c
    total = np.sum(indices)

    # Accuracy for class c
    per_class_accuracy[c] = (correct / total) * 100

# Print per-class accuracy
for cls, acc in per_class_accuracy.items():
    print(f"Class {cls}: {acc:.2f}%")

Class 0: 71.43%
Class 1: 92.98%


In [161]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the BEiT-large model and image processor
model_name = "microsoft/resnet-50"
model = AutoModelForImageClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True).to(device)
processor = AutoImageProcessor.from_pretrained(model_name)

# Freeze all layers except the classifier and the last transformer layer
'''
for name, param in model.named_parameters():
    if not name.startswith("classifier") \
        and not name.startswith("resnet.encoder.stages.3.layers.2")\
        and not name.startswith("resnet.encoder.stages.3.layers.1"):
        param.requires_grad = False
'''
# Verify which layers are trainable
trainable_params = [name for name, param in model.named_parameters() if param.requires_grad]
print(f"Trainable parameters: {trainable_params}")

# Define preprocessing function
def preprocess_images(examples):
    images = [processor(image.convert("RGB"), return_tensors="pt") for image in examples["image"]]
    pixel_values = torch.stack([image["pixel_values"].squeeze() for image in images])
    labels = torch.tensor(examples["label"])
    return {"pixel_values": pixel_values, "label": labels}

# Preprocess the dataset
train_dataset = dataset["train"].with_transform(preprocess_images)
validation_dataset = dataset["validation"].with_transform(preprocess_images)
test_dataset = dataset["test"].with_transform(preprocess_images)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./resnet_breastMNIST",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
)

# Define Trainer
trainer_resnet_breastMNIST = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=processor,
)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([2, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['resnet.embedder.embedder.convolution.weight', 'resnet.embedder.embedder.normalization.weight', 'resnet.embedder.embedder.normalization.bias', 'resnet.encoder.stages.0.layers.0.shortcut.convolution.weight', 'resnet.encoder.stages.0.layers.0.shortcut.normalization.weight', 'resnet.encoder.stages.0.layers.0.shortcut.normalization.bias', 'resnet.encoder.stages.0.layers.0.layer.0.convolution.weight', 'resnet.encoder.stages.0.layers.0.layer.0.normalization.weight', 'resnet.encoder.stages.0.layers.0.layer.0.normalization.bias', 'resnet.encoder.stages.0.layers.0.layer.1.convolution.weight', 'resnet.encoder.stages.0.layers.0.layer.1.normalization.weight', 'resnet.encoder.stages.0.layers.0.layer.1.normalization.bias', 'resnet.encoder.stages.0.layers.0.layer.2.convolution.weight', 'resnet.encoder.stages.0.layers.0.layer.2.normalization.weight', 'resnet.encoder.stages.0.layers.0.layer.2.normalization.bias', 'resnet.encoder.stages.0.layers.1.layer.0.convolution.weight', 'res

  trainer_resnet_breastMNIST = Trainer(


In [162]:
# Train the model
trainer_resnet_breastMNIST.train()

# Save the model and processor
model.save_pretrained("./resnet_breastMNIST")
processor.save_pretrained("./resnet_breastMNIST")

# Evaluate on the test set
test_results = trainer_resnet_breastMNIST.evaluate(test_dataset)
print(f"Test Results: {test_results}")

Epoch,Training Loss,Validation Loss
1,0.6328,0.625107
2,0.5864,0.562325
3,0.5418,0.71963
4,0.4272,0.537232
5,0.3303,0.498492


Test Results: {'eval_loss': 0.42436087131500244, 'eval_runtime': 6.3165, 'eval_samples_per_second': 24.697, 'eval_steps_per_second': 3.166, 'epoch': 5.0}


In [163]:
predictions = trainer_resnet_breastMNIST.predict(test_dataset)
pred_labels = predictions.predictions.argmax(axis=1)
true_labels = predictions.label_ids

# Calculate additional metrics
accuracy = accuracy_score(true_labels, pred_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')
specificity = recall_score(true_labels, pred_labels, pos_label=0)

# Display the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1-Score (weighted): {f1:.4f}")
print(f"Specificity: {specificity:.4f}")

Accuracy: 0.8269
Precision (weighted): 0.8268
Recall (weighted): 0.8269
F1-Score (weighted): 0.8083
Specificity: 0.4524


In [164]:
# Unique classes
classes = np.unique(true_labels)

# Per-class accuracy
per_class_accuracy = {}
for c in classes:
    # Get indices of samples belonging to class c
    indices = true_labels == c

    # Count correct predictions for class c
    correct = np.sum(pred_labels[indices] == true_labels[indices])

    # Total samples in class c
    total = np.sum(indices)

    # Accuracy for class c
    per_class_accuracy[c] = (correct / total) * 100

# Print per-class accuracy
for cls, acc in per_class_accuracy.items():
    print(f"Class {cls}: {acc:.2f}%")

Class 0: 45.24%
Class 1: 96.49%


# Retina MNIST

In [None]:
# Load MedMNIST dataset
train_dataset = RetinaMNIST(split='train', download=True, size=224)
val_dataset = RetinaMNIST(split='val', download=True, size=224)
test_dataset = RetinaMNIST(split='test', download=True, size=224)

hf_train_dataset = convert_medmnist_to_hf(train_dataset, split='train')
hf_val_dataset = convert_medmnist_to_hf(val_dataset, split='val')
hf_test_dataset = convert_medmnist_to_hf(test_dataset, split='test')

dataset = DatasetDict({"train": hf_train_dataset, "validation": hf_val_dataset, "test": hf_test_dataset})

Using downloaded and verified file: C:\Users\baiet\.medmnist\retinamnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\retinamnist_224.npz
Using downloaded and verified file: C:\Users\baiet\.medmnist\retinamnist_224.npz


In [29]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the BEiT-large model and image processor
model_name = "microsoft/beit-large-patch16-224-pt22k"
model = AutoModelForImageClassification.from_pretrained(model_name, num_labels=5).to(device) 
processor = AutoImageProcessor.from_pretrained(model_name)

# Freeze all layers except the classifier and the last transformer layer
for name, param in model.named_parameters():
    if not name.startswith("classifier") \
        and not name.startswith("beit.pooler")\
        and not name.startswith("beit.encoder.layer.23") \
        and not name.startswith("beit.encoder.layer.22"):
        param.requires_grad = False

# Verify which layers are trainable
trainable_params = [name for name, param in model.named_parameters() if param.requires_grad]
print(f"Trainable parameters: {trainable_params}")

# Define preprocessing function
def preprocess_images(examples):
    images = [processor(image.convert("RGB"), return_tensors="pt") for image in examples["image"]]
    pixel_values = torch.stack([image["pixel_values"].squeeze() for image in images])
    labels = torch.tensor(examples["label"])
    return {"pixel_values": pixel_values, "labels": labels}

# Preprocess the dataset
train_dataset = dataset["train"].with_transform(preprocess_images)
validation_dataset = dataset["validation"].with_transform(preprocess_images)
test_dataset = dataset["test"].with_transform(preprocess_images)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./beit_retinaMNIST",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.1,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    seed=42
)

# Define Trainer
trainer_beit_retinaMNIST = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=processor,
)

Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-large-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.bias', 'beit.pooler.layernorm.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['beit.encoder.layer.22.lambda_1', 'beit.encoder.layer.22.lambda_2', 'beit.encoder.layer.22.attention.attention.query.weight', 'beit.encoder.layer.22.attention.attention.query.bias', 'beit.encoder.layer.22.attention.attention.key.weight', 'beit.encoder.layer.22.attention.attention.value.weight', 'beit.encoder.layer.22.attention.attention.value.bias', 'beit.encoder.layer.22.attention.output.dense.weight', 'beit.encoder.layer.22.attention.output.dense.bias', 'beit.encoder.layer.22.intermediate.dense.weight', 'beit.encoder.layer.22.intermediate.dense.bias', 'beit.encoder.layer.22.output.dense.weight', 'beit.encoder.layer.22.output.dense.bias', 'beit.encoder.layer.22.layernorm_before.weight', 'beit.encoder.layer.22.layernorm_before.bias', 'beit.encoder.layer.22.layernorm_after.weight', 'beit.encoder.layer.22.layernorm_after.bias', 'beit.encoder.layer.23.lambda_1', 'beit.encoder.layer.23.lambda_2', 'beit.encoder.layer.23.attention.attention.query.weight', 'beit.encoder

  trainer_beit_retinaMNIST = Trainer(


In [30]:
# Train the model
trainer_beit_retinaMNIST.train()

# Save the model and processor
model.save_pretrained("./beit_retinaMNIST")
processor.save_pretrained("./beit_retinaMNIST")

# Evaluate on the test set
test_results = trainer_beit_retinaMNIST.evaluate(test_dataset)
print(f"Test Results: {test_results}")

Epoch,Training Loss,Validation Loss
1,1.2175,1.056499
2,1.2497,1.063681
3,1.1893,0.905668
4,0.8632,0.994372
5,0.6613,1.032848


Test Results: {'eval_loss': 1.0201350450515747, 'eval_runtime': 11.2566, 'eval_samples_per_second': 35.535, 'eval_steps_per_second': 4.442, 'epoch': 5.0}


In [31]:
predictions = trainer_beit_retinaMNIST.predict(test_dataset)
pred_labels = predictions.predictions.argmax(axis=1)
true_labels = predictions.label_ids

# Calculate additional metrics
accuracy = accuracy_score(true_labels, pred_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')
#specificity = recall_score(true_labels, pred_labels, pos_label=0)

# Display the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1-Score (weighted): {f1:.4f}")
#print(f"Specificity: {specificity:.4f}")

Accuracy: 0.6200
Precision (weighted): 0.6308
Recall (weighted): 0.6200
F1-Score (weighted): 0.6143


In [32]:
# Unique classes
classes = np.unique(true_labels)

# Per-class accuracy
per_class_accuracy = {}
for c in classes:
    # Get indices of samples belonging to class c
    indices = true_labels == c

    # Count correct predictions for class c
    correct = np.sum(pred_labels[indices] == true_labels[indices])

    # Total samples in class c
    total = np.sum(indices)

    # Accuracy for class c
    per_class_accuracy[c] = (correct / total) * 100

# Print per-class accuracy
for cls, acc in per_class_accuracy.items():
    print(f"Class {cls}: {acc:.2f}%")

Class 0: 78.16%
Class 1: 30.43%
Class 2: 59.78%
Class 3: 58.82%
Class 4: 15.00%


In [21]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the BEiT-large model and image processor
model_name = "microsoft/resnet-50"
model = AutoModelForImageClassification.from_pretrained(model_name, num_labels=5, ignore_mismatched_sizes=True).to(device)
processor = AutoImageProcessor.from_pretrained(model_name)

# Freeze all layers except the classifier and the last transformer layer
for name, param in model.named_parameters():
    if not name.startswith("classifier") and not name.startswith("resnet.encoder.stages.3.layers.2"):
        param.requires_grad = False

# Verify which layers are trainable
trainable_params = [name for name, param in model.named_parameters() if param.requires_grad]
print(f"Trainable parameters: {trainable_params}")

# Define preprocessing function
def preprocess_images(examples):
    images = [processor(image.convert("RGB"), return_tensors="pt") for image in examples["image"]]
    pixel_values = torch.stack([image["pixel_values"].squeeze() for image in images])
    labels = torch.tensor(examples["label"])
    return {"pixel_values": pixel_values, "label": labels}

# Preprocess the dataset
train_dataset = dataset["train"].with_transform(preprocess_images)
validation_dataset = dataset["validation"].with_transform(preprocess_images)
test_dataset = dataset["test"].with_transform(preprocess_images)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./resnet_retinaMNIST",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
)

# Define Trainer
trainer_resnet_retinaMNIST = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=processor,
)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([5]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([5, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable parameters: ['resnet.encoder.stages.3.layers.2.layer.0.convolution.weight', 'resnet.encoder.stages.3.layers.2.layer.0.normalization.weight', 'resnet.encoder.stages.3.layers.2.layer.0.normalization.bias', 'resnet.encoder.stages.3.layers.2.layer.1.convolution.weight', 'resnet.encoder.stages.3.layers.2.layer.1.normalization.weight', 'resnet.encoder.stages.3.layers.2.layer.1.normalization.bias', 'resnet.encoder.stages.3.layers.2.layer.2.convolution.weight', 'resnet.encoder.stages.3.layers.2.layer.2.normalization.weight', 'resnet.encoder.stages.3.layers.2.layer.2.normalization.bias', 'classifier.1.weight', 'classifier.1.bias']


  trainer_resnet_retinaMNIST = Trainer(


In [22]:
# Train the model
trainer_resnet_retinaMNIST.train()

# Save the model and processor
model.save_pretrained("./resnet_retinaMNIST")
processor.save_pretrained("./resnet_retinaMNIST")

# Evaluate on the test set
test_results = trainer_resnet_retinaMNIST.evaluate(test_dataset)
print(f"Test Results: {test_results}")

Epoch,Training Loss,Validation Loss
1,1.1589,1.244556
2,1.1888,1.146783
3,1.109,1.132084
4,1.0783,1.108217
5,1.0903,1.112605


Test Results: {'eval_loss': 1.1273815631866455, 'eval_runtime': 2.3279, 'eval_samples_per_second': 171.829, 'eval_steps_per_second': 21.479, 'epoch': 5.0}


In [23]:
predictions = trainer_resnet_retinaMNIST.predict(test_dataset)
pred_labels = predictions.predictions.argmax(axis=1)
true_labels = predictions.label_ids

# Calculate additional metrics
accuracy = accuracy_score(true_labels, pred_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')
#specificity = recall_score(true_labels, pred_labels, pos_label=0)

# Display the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1-Score (weighted): {f1:.4f}")
#print(f"Specificity: {specificity:.4f}")

Accuracy: 0.5550
Precision (weighted): 0.4491
Recall (weighted): 0.5550
F1-Score (weighted): 0.4697


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
# Unique classes
classes = np.unique(true_labels)

# Per-class accuracy
per_class_accuracy = {}
for c in classes:
    # Get indices of samples belonging to class c
    indices = true_labels == c

    # Count correct predictions for class c
    correct = np.sum(pred_labels[indices] == true_labels[indices])

    # Total samples in class c
    total = np.sum(indices)

    # Accuracy for class c
    per_class_accuracy[c] = (correct / total) * 100

# Print per-class accuracy
for cls, acc in per_class_accuracy.items():
    print(f"Class {cls}: {acc:.2f}%")

Class 0: 95.98%
Class 1: 0.00%
Class 2: 21.74%
Class 3: 51.47%
Class 4: 0.00%
