In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


class MultiScaleCNN(nn.Module):
    def __init__(self, num_classes, dropout_prob=0.3):
        super(MultiScaleCNN, self).__init__()
        
        # Convolutional Blocks
        self.conv1 = self._conv_block(3, 32)
        self.conv2 = self._conv_block(32, 64)
        self.conv3 = self._conv_block(64, 128)
        self.conv4 = self._conv_block(128, 256)
        self.conv5 = self._conv_block(256, 512)
        
        # Adaptive Pooling to ensure fixed-size feature maps
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Fully Connected Layer with Multi-Scale Feature Fusion
        self.fc = nn.Linear(32 + 64 + 128 + 256 + 512, num_classes)
        
        # Dropout for regularization
        self.dropout = nn.Dropout(dropout_prob)
    
    def _conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)  # Reduce spatial dimensions
        )
    
    def forward(self, x):
        # Feature Extraction
        x1 = self.conv1(x)  # (B, 32, H/2, W/2)
        x2 = self.conv2(x1) # (B, 64, H/4, W/4)
        x3 = self.conv3(x2) # (B, 128, H/8, W/8)
        x4 = self.conv4(x3) # (B, 256, H/16, W/16)
        x5 = self.conv5(x4) # (B, 512, H/32, W/32)
        
        # Global Pooling for Multi-Scale Feature Fusion
        x1_pool = self.global_pool(x1).view(x.size(0), -1)
        x2_pool = self.global_pool(x2).view(x.size(0), -1)
        x3_pool = self.global_pool(x3).view(x.size(0), -1)
        x4_pool = self.global_pool(x4).view(x.size(0), -1)
        x5_pool = self.global_pool(x5).view(x.size(0), -1)
        
        # Concatenate Multi-Scale Features
        x_fused = torch.cat([x1_pool, x2_pool, x3_pool, x4_pool, x5_pool], dim=1)
        x_fused = self.dropout(x_fused)
        
        # Classification Layer
        out = self.fc(x_fused)
        return out

# Data Preparation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(root='/Users/arnavkarnik/Documents/Recognition-of-Medicinal-Plant-Species-Deep-Learning-Project--Sem6/Medicinal Leaf Dataset/Segmented Medicinal Leaf Images/', transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Example Usage
if __name__ == "__main__":
    num_classes = len(dataset.classes)  # Dynamically determine number of species
    model = MultiScaleCNN(num_classes=num_classes)
    sample_input, _ = next(iter(dataloader))  # Get a batch of images
    output = model(sample_input)
    print(output.shape)  # Expected output shape: (batch_size, num_classes)


torch.Size([32, 30])


In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm import tqdm

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Augmentation and Normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Dataset and Dataloaders
dataset_path = '/Users/arnavkarnik/Documents/Recognition-of-Medicinal-Plant-Species-Deep-Learning-Project--Sem6/Medicinal Leaf Dataset/Segmented Medicinal Leaf Images/'
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Split into train and validation sets (80-20 split)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Define Model
class MultiScaleCNN(nn.Module):
    def __init__(self, num_classes, dropout_prob=0.3):
        super(MultiScaleCNN, self).__init__()
        
        # Convolutional Blocks
        self.conv1 = self._conv_block(3, 32)
        self.conv2 = self._conv_block(32, 64)
        self.conv3 = self._conv_block(64, 128)
        self.conv4 = self._conv_block(128, 256)
        self.conv5 = self._conv_block(256, 512)
        
        # Adaptive Pooling to ensure fixed-size feature maps
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Fully Connected Layer with Multi-Scale Feature Fusion
        self.fc = nn.Linear(32 + 64 + 128 + 256 + 512, num_classes)
        
        # Dropout for regularization
        self.dropout = nn.Dropout(dropout_prob)
    
    def _conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)  # Reduce spatial dimensions
        )
    
    def forward(self, x):
        # Feature Extraction
        x1 = self.conv1(x)  # (B, 32, H/2, W/2)
        x2 = self.conv2(x1) # (B, 64, H/4, W/4)
        x3 = self.conv3(x2) # (B, 128, H/8, W/8)
        x4 = self.conv4(x3) # (B, 256, H/16, W/16)
        x5 = self.conv5(x4) # (B, 512, H/32, W/32)
        
        # Global Pooling for Multi-Scale Feature Fusion
        x1_pool = self.global_pool(x1).view(x.size(0), -1)
        x2_pool = self.global_pool(x2).view(x.size(0), -1)
        x3_pool = self.global_pool(x3).view(x.size(0), -1)
        x4_pool = self.global_pool(x4).view(x.size(0), -1)
        x5_pool = self.global_pool(x5).view(x.size(0), -1)
        
        # Concatenate Multi-Scale Features
        x_fused = torch.cat([x1_pool, x2_pool, x3_pool, x4_pool, x5_pool], dim=1)
        x_fused = self.dropout(x_fused)
        
        # Classification Layer
        out = self.fc(x_fused)
        return out

# Initialize model, loss, optimizer
num_classes = len(dataset.classes)
model = MultiScaleCNN(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 20
best_val_acc = 0.0
checkpoint_path = "best_multiscale_cnn.pth"

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        loop.set_postfix(loss=loss.item(), acc=100 * correct / total)

    train_acc = 100 * correct / total
    avg_train_loss = running_loss / len(train_loader)

    # Validation Loop
    model.eval()
    val_correct, val_total = 0, 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_acc = 100 * val_correct / val_total
    avg_val_loss = val_loss / len(val_loader)

    print(f"Epoch [{epoch+1}/{num_epochs}] | "
          f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), checkpoint_path)
        print(f"✅ Best model saved with val acc: {best_val_acc:.2f}%")

print("🎉 Training Complete! Best model saved at:", checkpoint_path)

# Load best model for evaluation
model.load_state_dict(torch.load(checkpoint_path))
model.eval()
print("✅ Best model loaded for final evaluation.")


Epoch 1/20: 100%|██████████| 46/46 [02:55<00:00,  3.82s/it, acc=13.1, loss=2.64]


Epoch [1/20] | Train Loss: 3.0523, Train Acc: 13.08% | Val Loss: 5.3954, Val Acc: 6.81%
✅ Best model saved with val acc: 6.81%


Epoch 2/20: 100%|██████████| 46/46 [03:01<00:00,  3.95s/it, acc=20.7, loss=2.59]


Epoch [2/20] | Train Loss: 2.6126, Train Acc: 20.71% | Val Loss: 2.7117, Val Acc: 19.35%
✅ Best model saved with val acc: 19.35%


Epoch 3/20: 100%|██████████| 46/46 [03:12<00:00,  4.19s/it, acc=29.8, loss=2.41]


Epoch [3/20] | Train Loss: 2.3196, Train Acc: 29.84% | Val Loss: 2.5826, Val Acc: 29.43%
✅ Best model saved with val acc: 29.43%


Epoch 4/20: 100%|██████████| 46/46 [03:15<00:00,  4.26s/it, acc=34.8, loss=2.52]


Epoch [4/20] | Train Loss: 2.1308, Train Acc: 34.81% | Val Loss: 2.2987, Val Acc: 28.61%


Epoch 5/20: 100%|██████████| 46/46 [03:18<00:00,  4.30s/it, acc=41.3, loss=1.8] 


Epoch [5/20] | Train Loss: 1.9495, Train Acc: 41.35% | Val Loss: 2.3369, Val Acc: 28.61%


Epoch 6/20: 100%|██████████| 46/46 [03:20<00:00,  4.35s/it, acc=43.5, loss=1.87]


Epoch [6/20] | Train Loss: 1.8516, Train Acc: 43.53% | Val Loss: 1.8082, Val Acc: 43.60%
✅ Best model saved with val acc: 43.60%


Epoch 7/20: 100%|██████████| 46/46 [03:21<00:00,  4.38s/it, acc=45.8, loss=1.75]


Epoch [7/20] | Train Loss: 1.7385, Train Acc: 45.84% | Val Loss: 1.7214, Val Acc: 45.50%
✅ Best model saved with val acc: 45.50%


Epoch 8/20: 100%|██████████| 46/46 [03:20<00:00,  4.36s/it, acc=44.7, loss=1.61]


Epoch [8/20] | Train Loss: 1.7290, Train Acc: 44.69% | Val Loss: 2.0691, Val Acc: 35.97%


Epoch 9/20: 100%|██████████| 46/46 [03:23<00:00,  4.43s/it, acc=51.5, loss=1.45]


Epoch [9/20] | Train Loss: 1.5766, Train Acc: 51.50% | Val Loss: 2.0577, Val Acc: 39.78%


Epoch 10/20: 100%|██████████| 46/46 [03:21<00:00,  4.38s/it, acc=50.6, loss=2.27]


Epoch [10/20] | Train Loss: 1.5422, Train Acc: 50.61% | Val Loss: 2.2008, Val Acc: 34.33%


Epoch 11/20: 100%|██████████| 46/46 [03:19<00:00,  4.33s/it, acc=55.3, loss=1.24] 


Epoch [11/20] | Train Loss: 1.4299, Train Acc: 55.31% | Val Loss: 1.6308, Val Acc: 45.78%
✅ Best model saved with val acc: 45.78%


Epoch 12/20: 100%|██████████| 46/46 [03:22<00:00,  4.41s/it, acc=58.2, loss=1.16] 


Epoch [12/20] | Train Loss: 1.3618, Train Acc: 58.17% | Val Loss: 1.3635, Val Acc: 55.86%
✅ Best model saved with val acc: 55.86%


Epoch 13/20: 100%|██████████| 46/46 [03:24<00:00,  4.45s/it, acc=61.8, loss=1.19] 


Epoch [13/20] | Train Loss: 1.1862, Train Acc: 61.78% | Val Loss: 1.1956, Val Acc: 60.76%
✅ Best model saved with val acc: 60.76%


Epoch 14/20: 100%|██████████| 46/46 [03:08<00:00,  4.10s/it, acc=64.8, loss=1.36] 


Epoch [14/20] | Train Loss: 1.1099, Train Acc: 64.78% | Val Loss: 1.0518, Val Acc: 68.94%
✅ Best model saved with val acc: 68.94%


Epoch 15/20: 100%|██████████| 46/46 [03:15<00:00,  4.25s/it, acc=67.8, loss=0.773]


Epoch [15/20] | Train Loss: 1.0202, Train Acc: 67.78% | Val Loss: 1.5910, Val Acc: 53.41%


Epoch 16/20: 100%|██████████| 46/46 [03:24<00:00,  4.44s/it, acc=69.9, loss=1.32] 


Epoch [16/20] | Train Loss: 0.9745, Train Acc: 69.89% | Val Loss: 0.9600, Val Acc: 66.49%


Epoch 17/20: 100%|██████████| 46/46 [03:24<00:00,  4.44s/it, acc=71.9, loss=0.642]


Epoch [17/20] | Train Loss: 0.8824, Train Acc: 71.87% | Val Loss: 1.2272, Val Acc: 61.04%


Epoch 18/20: 100%|██████████| 46/46 [03:22<00:00,  4.40s/it, acc=73.9, loss=0.819]


Epoch [18/20] | Train Loss: 0.8239, Train Acc: 73.91% | Val Loss: 0.8039, Val Acc: 74.93%
✅ Best model saved with val acc: 74.93%


Epoch 19/20: 100%|██████████| 46/46 [03:24<00:00,  4.45s/it, acc=73.1, loss=0.521]


Epoch [19/20] | Train Loss: 0.8201, Train Acc: 73.09% | Val Loss: 0.8935, Val Acc: 70.30%


Epoch 20/20: 100%|██████████| 46/46 [03:24<00:00,  4.44s/it, acc=76.4, loss=0.416]


Epoch [20/20] | Train Loss: 0.7348, Train Acc: 76.36% | Val Loss: 1.3930, Val Acc: 55.31%
🎉 Training Complete! Best model saved at: best_multiscale_cnn.pth
✅ Best model loaded for final evaluation.
