In [1]:
import torch
from torchvision import datasets, transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [2]:
# Define the Faster R-CNN architecture
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Modify the roi_heads module to return 2 values
in_features = model.roi_heads.box_predictor.cls_score.in_features
num_classes = 10  # MNIST has 10 classes
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes + 1)  # +1 for the background class




In [4]:
# Preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
])

trainset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
testset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=4, shuffle=True)
testloader = DataLoader(testset, batch_size=4, shuffle=False)


In [5]:
# Train the Faster R-CNN model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    for images, targets in trainloader:
        images = [img.to(device) for img in images]

        targets_list = []
        for target in targets:
            if isinstance(target, dict) and "boxes" in target and "labels" in target:
                target_dict = {
                    "boxes": target["boxes"].to(device),
                    "labels": target["labels"].to(device)
                }
                targets_list.append(target_dict)

        if len(targets_list) > 0:
            optimizer.zero_grad()
            loss_dict = model(images, targets_list)
            losses = sum(loss for loss in loss_dict.values())
            losses.backward()
            optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs} complete.")

print("Classifier training complete.")


Epoch 1/5 complete.
Epoch 2/5 complete.
Epoch 3/5 complete.
Epoch 4/5 complete.
Epoch 5/5 complete.
Classifier training complete.


In [None]:
model.eval()

all_labels = []
all_predictions = []

with torch.no_grad():
    for images, targets in testloader:
        images = [img.to(device) for img in images]

        outputs = model(images)

        # Assume targets are tensors
        for output, target in zip(outputs, targets):
            if target.ndim > 0:
                _, predicted = torch.max(output["labels"], -1)
                all_labels.extend(target.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())


# Calculate accuracy
correct = sum(1 for true, pred in zip(all_labels, all_predictions) if true == pred)
total = len(all_labels)
accuracy = correct / total
print(f'Accuracy on test set: {accuracy * 100:.2f}%')


In [None]:
from sklearn.metrics import f1_score
import torch.nn as nn
# Assuming you have already trained the network and have a trained model 'model'
model.eval()

# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in range(10)}  # Assuming you have 10 classes (for MNIST)
total_pred = {classname: 0 for classname in range(10)}

# Initialize variables for F1 score calculation
all_labels = []
all_predictions = []

# Initialize variables for loss calculation
criterion = nn.CrossEntropyLoss()
test_loss = 0.0
total_samples = 0

with torch.no_grad():
    for images, targets in testloader:
        images = [img.to(device) for img in images]

        outputs = model(images)

        # Assume targets are tensors
        for output, target in zip(outputs, targets):
            if target["labels"].ndim > 0:
                _, predicted = torch.max(output["labels"], -1)

                # Calculate loss
                loss = criterion(output["labels"], target["labels"])
                test_loss += loss.item()
                total_samples += target["labels"].size(0)

                # collect the correct predictions for each class
                for label, prediction in zip(target["labels"], predicted):
                    if label == prediction:
                        correct_pred[label.item()] += 1
                    total_pred[label.item()] += 1

                # collect labels and predictions for F1 score calculation
                all_labels.extend(target["labels"].cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())

# Calculate F1 score
f1 = f1_score(all_labels, all_predictions, average='weighted')

# Calculate average test loss
average_test_loss = test_loss / (total_samples / testloader.batch_size)

# Print accuracy, F1 score, and loss
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class {classname} is {accuracy:.1f}%')

print(f'Weighted F1 score: {f1:.4f}')
print(f'Average Test Loss: {average_test_loss:.4f}')