Senior Project: Training a CNN to identify plant saplings

Data Upload and preprocessing

Import dataset

In [1]:
from typing import Callable

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as datasets

import numpy as np
import matplotlib.pyplot as plt
import tqdm


import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import pandas as pd

from sklearn.preprocessing import LabelEncoder
torch.manual_seed(42)

<torch._C.Generator at 0x10fc66cb0>

In [2]:

class PlantDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform
        self.label_encoder = LabelEncoder()
        self.dataframe["encoded_label"] = self.label_encoder.fit_transform(self.dataframe.iloc[:, 1])

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_path = self.dataframe.iloc[idx]["image_path"]
        image = Image.open(image_path).convert("RGB")


        label = self.dataframe.iloc[idx]["encoded_label"]

        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [3]:
train = pd.read_csv("data/train.csv")
val = pd.read_csv("data/val.csv")
test = pd.read_csv("data/test.csv")


train_dataset = PlantDataset(dataframe=train, image_dir="inaturalist", transform=transform)
val_dataset = PlantDataset(dataframe=val, image_dir="inaturalist", transform=transform)
test_dataset = PlantDataset(dataframe=test, image_dir="inaturalist", transform=transform)


train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)




Visualize image:

In [4]:

image, label = train_dataset[0]


image = image.permute(1, 2, 0).numpy()


mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
image = image * std + mean
image = np.clip(image, 0, 1)


plt.imshow(image)
plt.title(f"Label: {label}")
plt.axis("off")
plt.show()


for i in range(100):
    plt.subplot(10, 10, i+1)
    plt.imshow(train_dataset[i][0][0], cmap='gray')
    plt.axis('off')

FileNotFoundError: [Errno 2] No such file or directory: '/Users/itembematiku/Desktop/Senior Project/Model/inaturalist_photos/9010247_479913848.jpg'

Baseline Model: CNN

In [None]:
num_classes = len(train_dataset.label_encoder.classes_)

class SimpleCNN(nn.Module):
    def __init__(
        self,
        input_dim: int = 1,
        output_dim: int = 10,
        hidden_dim_list: list = [4, 8],
    ):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim_list = hidden_dim_list


        self.conv_1 = nn.Conv2d(3, 4, 3, 1, 1)
        self.conv_2 = nn.Conv2d(4, 8, 3, 1, 1)
        self.fc_1 = nn.Linear(8 * 224 * 224, 128)
        self.fc_2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv_1(x)
        x = self.conv_2(x)
        x = torch.flatten(x,1)
        x = self.fc_1(x)
        x = self.fc_2(x)
        return x

In [None]:
def plot_metrics(train_metrics, test_metrics, xlabel, ylabel, title):
    plt.plot(train_metrics, label='Train')
    plt.plot(test_metrics, label='Test')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.show()

def train(model, loss_fn, train_loader, test_loader, optimizer, epochs=5):
    """Train the model.
    Args:
        model: the model
        loss_fn: the loss function
        train_loader: the training data loader
        test_loader: the testing data loader
        optimizer: the optimizer
        epochs: the number of epochs to train
    Returns:
        train_losses: the training losses
        test_losses: the testing losses
    """
    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []

    loop = tqdm.tqdm(range(1, epochs + 1))

    for epoch in loop:

        train_loss, train_accuracy = train_epoch(model, loss_fn, train_loader, optimizer)
        test_loss, test_accuracy = test_epoch(model, loss_fn, test_loader)

        train_losses.append(train_loss)
        test_losses.append(test_loss)
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        loop.set_description(f'Epoch {epoch}')
        loop.set_postfix(train_loss=train_loss, test_loss=test_loss, train_accuracy=train_accuracy, test_accuracy=test_accuracy)

    return train_losses, test_losses, train_accuracies, test_accuracies


def train_epoch(model, loss_fn, train_loader, optimizer):
    """Train the model for one epoch.
    Args:
        model: the model
        loss_fn: the loss function
        train_loader: the training data loader
        optimizer: the optimizer
    Returns:
        train_loss: the loss of the epoch
    """
    model.train()
    train_loss = 0
    train_accuracy = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted ==target).sum().item()

        train_loss /= len(train_loader)
        train_accuracy = 100. * correct / total

    return train_loss, train_accuracy

def test_epoch(model, loss_fn, test_loader):
    """Test the model for one epoch.
    Args:
        model: the model
        loss_fn: the loss function
        test_loader: the testing data loader
    Returns:
        test_loss: the loss of the epoch
    """
    model.eval()
    test_loss = 0
    test_accuracy = 0

    with torch.no_grad():
        for data, target in test_loader:

            output = model(data)
            test_loss += loss_fn(output, target).item()
            _, predicted = torch.max(output.data, 1)
            test_accuracy += (predicted == target).float().mean().item()

    test_loss /= len(test_loader)
    test_accuracy *= 100 / len(test_loader)

    return test_loss, test_accuracy

In [None]:
batch_size = 64
learning_rate = 1e-4
epochs = 10
input_dim = 1
hidden_dim_list = [4, 8]
output_dim = 10

model = SimpleCNN(input_dim=input_dim, output_dim=output_dim, hidden_dim_list=hidden_dim_list)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

train_losses, test_losses, train_accuracies, test_accuracies = train(model, loss_fn, train_loader, test_loader, optimizer, epochs=epochs)

plt.subplot(2, 1, 1)
plot_metrics(train_losses, test_losses, xlabel="Epoch", ylabel="Loss", title="Loss")
plt.subplot(2, 1, 2)
plot_metrics(train_accuracies, test_accuracies, xlabel="Epoch", ylabel="Accuracy", title="Accuracy")

  0%|          | 0/10 [10:15<?, ?it/s]


KeyboardInterrupt: 

Training

Validation