In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm 

### #1: Dataset Class and Dataloader

In [2]:
class PalmsDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir,transform=transform)
        self.transform=transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        return self.data[index]
    
    @property
    def classes(self):
        return self.data.classes

In [3]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [4]:
train_set = PalmsDataset("train",transform=train_transform)

#### Dataset vs Dataloader

In [5]:
for image, label in train_set:
    break

image.shape, label

(torch.Size([3, 224, 224]), 0)

In [6]:
train_loader = DataLoader(batch_size=32, dataset=train_set, shuffle=True)

In [None]:
Dat

TypeError: 'DataLoader' object is not subscriptable

### #2: Classifier

In [19]:
class PalmClassifier(nn.Module):
    def __init__(self, number_of_classes=9):
        super(PalmClassifier, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1, stride=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1, stride=1)
        # self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=1)
        
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        # self.bn4 = nn.BatchNorm2d(256)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.gap = nn.AdaptiveAvgPool2d(1)
        
        # self.fc1 = nn.Linear(256*14*14, 512)
        # self.fc2 = nn.Linear(512,number_of_classes)
        
        self.fc1 = nn.Linear(128, number_of_classes)
        
        self.drop1 = nn.Dropout(0.5)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool(x)
        # x = F.relu(self.bn4(self.conv4(x)))
        # x = self.pool(x)
        x = self.gap(x)
        
        # x = x.view(-1, 256 * 14 * 14)
        # x = F.relu(self.fc1(x))
        x = x.view(x.size(0), -1)
        x = self.drop1(x)
        x = self.fc1(x)
        
        return x
        
        
        

In [20]:
model = PalmClassifier()

In [21]:
model(images)

tensor([[ 2.4466e-01, -2.1185e-01,  8.0985e-01, -3.4444e-01,  5.7754e-01,
          2.6155e-01,  1.8683e-01, -5.6259e-01,  3.9111e-01],
        [-5.0799e-01, -7.5958e-01,  5.2727e-01,  2.0145e-01,  1.0653e+00,
         -4.1785e-01,  1.8691e-02, -1.1615e-02,  5.4840e-01],
        [-1.3935e-01, -1.7275e+00,  1.9763e+00, -5.3913e-01,  1.6228e+00,
          4.9126e-01, -5.7103e-02, -1.2463e+00,  6.9972e-02],
        [-2.5647e-02, -1.2291e-01,  9.8184e-01, -8.2864e-01, -1.9743e-02,
          7.1930e-01,  2.7903e-01,  8.1783e-02,  1.4960e+00],
        [-8.4101e-02, -3.0808e-01,  2.3071e-01, -2.1043e-01,  8.0294e-01,
          7.0575e-01,  1.5922e-01, -2.8176e-01,  4.3374e-01],
        [-6.2572e-02,  5.5191e-01,  4.3906e-01,  1.2550e-01,  7.0677e-01,
          4.7213e-01, -9.4063e-02,  7.7910e-01,  8.3635e-01],
        [-9.3516e-02,  1.6707e-01,  1.1808e-01, -6.8341e-01,  1.3686e+00,
          1.8557e-01, -1.1320e+00, -9.0969e-01,  6.9037e-01],
        [-4.0727e-01, -4.4076e-01,  4.6988e-01, 

In [22]:
ex_out = model(images)
ex_out.shape

torch.Size([32, 9])

In [27]:
print(str(model))

PalmClassifier(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (gap): AdaptiveAvgPool2d(output_size=1)
  (fc1): Linear(in_features=128, out_features=9, bias=True)
  (drop1): Dropout(p=0.5, inplace=False)
)


In [None]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

In [None]:
val_dataset = PalmsDataset("valid", transform=transform)
test_dataset = PalmsDataset("test", transform=transform)

val_loader = DataLoader(val_dataset,batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
device = torch.device("cuda")

In [7]:
train_set.data.class_to_idx

{'Black Scorch': 0,
 'Fusarium Wilt': 1,
 'Healthy sample': 2,
 'Leaf Spots': 3,
 'Magnesium Deficiency': 4,
 'Manganese Deficiency': 5,
 'Parlatoria Blanchardi': 6,
 'Potassium Deficiency': 7,
 'Rachis Blight': 8}

In [None]:
model = PalmClassifier(number_of_classes=9).to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 mode="max",
                                                 patience=3,
                                                 verbose=True)

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=25):
    best_acc = 0.0
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} (Train)"):
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct / total
        
        # Validation phase
        val_loss, val_acc = validate(model, val_loader, criterion)
        
        # Update LR scheduler
        scheduler.step(val_acc)
        
        # Print epoch results
        print(f"Epoch {epoch+1}: "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
        
        # Save best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_palm_cnn.pth')
            print(f"Saved new best model (Acc: {best_acc:.2f}%)")

def validate(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validating"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    val_loss = running_loss / len(val_loader)
    val_acc = 100 * correct / total
    return val_loss, val_acc

# Start training
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=25)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
model.eval()
y_test = []
predictions = []
correct = 0
total = 0
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="testing"):
        y_test.append(labels)
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        
        total += labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        predictions.append(pred)
        correct += (predicted == labels).sum().item()
        
test_acc = 100 * correct / total

cm = confusion_matrix(y_test, predictions)
ConfusionMatrixDisplay(cm).plot()
        

In [None]:
model = model.load_state_dict(torch.load())