In [9]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [10]:
batch_size = 64
num_epochs = 10 
learning_rate = 0.0001
data_dir = 'tomato'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(128),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomVerticalFlip(),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [12]:
train_dataset = datasets.ImageFolder(root='tomato/train', transform=transform_train)
val_dataset = datasets.ImageFolder(root='tomato/val', transform=transform_val)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [13]:
class EnhancedCNN(nn.Module):
    def __init__(self):
        super(EnhancedCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(256 * 16 * 16, 1024)
        self.fc2 = nn.Linear(1024, len(train_dataset.classes))
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 256 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [14]:
model = EnhancedCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.7)

In [15]:
def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=25):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
        
        scheduler.step()
        
        average_loss = running_loss / len(train_loader)
        accuracy = (correct_predictions / total_samples) * 100
        
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {average_loss:.4f}, Accuracy: {accuracy:.2f}%')

train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=num_epochs)


Epoch 1/10, Loss: 2.0588, Accuracy: 34.82%
Epoch 2/10, Loss: 1.5190, Accuracy: 46.41%
Epoch 3/10, Loss: 1.3927, Accuracy: 51.55%
Epoch 4/10, Loss: 1.3421, Accuracy: 52.99%
Epoch 5/10, Loss: 1.2875, Accuracy: 54.86%
Epoch 6/10, Loss: 1.2230, Accuracy: 57.59%
Epoch 7/10, Loss: 1.2069, Accuracy: 57.79%
Epoch 8/10, Loss: 1.1622, Accuracy: 60.06%
Epoch 9/10, Loss: 1.1319, Accuracy: 60.54%
Epoch 10/10, Loss: 1.1057, Accuracy: 61.34%


In [16]:
def evaluate_model(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Validation Accuracy: {accuracy:.2f}%')
    
evaluate_model(model, val_loader)

Validation Accuracy: 76.00%


In [17]:
torch.save(model.state_dict(), './models/torch_cnn_v2.pth')