In [13]:
! pip install torch torchvision



In [27]:
import os
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.optim as optim
from torch.optim import lr_scheduler
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import time
import copy
from sklearn.metrics import confusion_matrix, classification_report

In [28]:
# Define custom dataset class
class CactusDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.classes = sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))])
        self.imgs = []
        for class_idx, class_name in enumerate(self.classes):
            class_path = os.path.join(root, class_name)
            for img_name in os.listdir(class_path):
                if img_name.endswith(('.jpg', '.png')):
                    self.imgs.append((os.path.join(class_path, img_name), class_idx))

    def __getitem__(self, idx):
        img_path, class_idx = self.imgs[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transforms:
            img = self.transforms(img)
        target = {
            "boxes": torch.tensor([[0, 0, img.size(1), img.size(2)]], dtype=torch.float32),  # Full image
            "labels": torch.tensor([class_idx], dtype=torch.int64),
            "image_id": torch.tensor([idx])
        }
        return img, target

    def __len__(self):
        return len(self.imgs)

In [29]:
# Define transformations
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

In [30]:
# Load dataset and create train/validation split
data_dir = '/Users/buzz/Documents/Cactus-main/Cactus Identification/Cactus Pictures'
dataset = CactusDataset(data_dir, transforms=data_transforms)
train_size = int(0.7 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

dataloaders = {
    'train': DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x))),
    'val': DataLoader(val_dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))
}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}
class_names = dataset.classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [31]:
# Load a pre-trained Faster R-CNN model and modify it
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, len(class_names))

model = model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [33]:
# Train the model
num_epochs = 20
for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)
    
    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0

        for images, targets in dataloaders[phase]:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                if phase == 'train':
                    loss_dict = model(images, targets)
                    losses = sum(loss for loss in loss_dict.values())
                    losses.backward()
                    optimizer.step()
                    running_loss += losses.item() * len(images)
                else:
                    outputs = model(images)
                    preds = [output['labels'][0].item() for output in outputs]
                    true_labels = [target['labels'][0].item() for target in targets]
                    running_corrects += sum(p == t for p, t in zip(preds, true_labels))
        
        if phase == 'train':
            epoch_loss = running_loss / dataset_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f}')
        else:
            epoch_acc = running_corrects / dataset_sizes[phase]
            print(f'{phase} Acc: {epoch_acc:.4f}')

    exp_lr_scheduler.step()

print('Training complete')

Epoch 0/19
----------
train Loss: 0.1457
val Acc: 0.1183
Epoch 1/19
----------
train Loss: 0.0791
val Acc: 0.1720
Epoch 2/19
----------
train Loss: 0.0691
val Acc: 0.1720
Epoch 3/19
----------
train Loss: 0.0640
val Acc: 0.2043
Epoch 4/19
----------
train Loss: 0.0633
val Acc: 0.2849
Epoch 5/19
----------
train Loss: 0.0597
val Acc: 0.2903
Epoch 6/19
----------
train Loss: 0.0557
val Acc: 0.3172
Epoch 7/19
----------
train Loss: 0.0489
val Acc: 0.3333
Epoch 8/19
----------
train Loss: 0.0472
val Acc: 0.3441
Epoch 9/19
----------
train Loss: 0.0476
val Acc: 0.3441
Epoch 10/19
----------
train Loss: 0.0474
val Acc: 0.3387
Epoch 11/19
----------
train Loss: 0.0471
val Acc: 0.3226
Epoch 12/19
----------
train Loss: 0.0468
val Acc: 0.3387
Epoch 13/19
----------
train Loss: 0.0461
val Acc: 0.3817
Epoch 14/19
----------
train Loss: 0.0456
val Acc: 0.3656
Epoch 15/19
----------
train Loss: 0.0454
val Acc: 0.3495
Epoch 16/19
----------
train Loss: 0.0459
val Acc: 0.3495
Epoch 17/19
----------
t

In [None]:
# Save the model
torch.save(model.state_dict(), 'faster_rcnn_cactus_classification.pth')

In [None]:
# Evaluate the model
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for images, targets in dataloaders['val']:
        images = list(image.to(device) for image in images)
        outputs = model(images)
        preds = [output['labels'][0].item() for output in outputs]
        labels = [target['labels'][0].item() for target in targets]
        all_preds.extend(preds)
        all_labels.extend(labels)

In [None]:
# Calculate confusion matrix and plot it
conf_matrix = confusion_matrix(all_labels, all_preds)
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# Print classification report
print(classification_report(all_labels, all_preds, target_names=class_names))

In [None]:
import time
import torch
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Evaluate the model
def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for images, targets in dataloader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            # Perform inference
            outputs = model(images)
            
            # Extract predictions
            preds = [output['labels'].cpu().numpy() for output in outputs]
            true_labels = [target['labels'].cpu().numpy() for target in targets]

            all_preds.extend(preds)
            all_targets.extend(true_labels)
    
    return all_preds, all_targets

# Measure inference time
start_time = time.time()
all_preds, all_targets = evaluate_model(model, dataloaders['val'], device)
inference_time = (time.time() - start_time) / len(dataloaders['val'])

# Flatten lists of arrays
flatten_preds = [item for sublist in all_preds for item in sublist]
flatten_targets = [item for sublist in all_targets for item in sublist]

# Convert to binary classification for metrics
# Note: This is a simplified approach for multi-class, adapt as needed for your use case
y_pred = [1 if p in flatten_preds else 0 for p in flatten_preds]
y_true = [1 if t in flatten_targets else 0 for t in flatten_targets]

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")
print(f"Average Inference Time per Batch: {inference_time:.6f} seconds")
