In [1]:
import numpy as np
import torch
import torchvision 
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torchvision import datasets
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

Device: cuda:0


In [3]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(size=256),
    transforms.CenterCrop(size=224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
train_dataset = datasets.ImageFolder("./Data/train/dataset2/", train_transform)
test_dataset = datasets.ImageFolder("./Data/test/dataset2/", test_transform)
validation_dataset = datasets.ImageFolder("./Data/validation/dataset2/", test_transform)

print(f"Length of Train Data : {len(train_dataset)}")
print(f"Length of Test Data : {len(test_dataset)}")
print(f"Length of Validation Data : {len(validation_dataset)}")
print(f"Classes: {train_dataset.classes}")

Length of Train Data : 1200
Length of Test Data : 150
Length of Validation Data : 150
Classes: ['bird', 'car', 'house']


In [5]:
num_workers = 10
batch_size = 32

print(num_workers)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, num_workers = num_workers, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, num_workers = num_workers)
validation_loadder = torch.utils.data.DataLoader(validation_dataset, batch_size = batch_size, num_workers = num_workers)

10


In [26]:
import torch.nn as nn
import torch.optim as optim
import pickle

# Define the CNN architecture
class Net(nn.Module):
    def __init__(self, in_shape, out_classes=3):
      super().__init__()
      self.network = nn.Sequential(
            
            nn.Conv2d(in_shape[0], 32, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            # nn.Dropout(0.1),

            nn.Conv2d(32,64, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            # nn.Dropout(0.1),

            nn.Conv2d(64,128, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            # nn.Dropout(0.1),

            nn.Conv2d(128,128, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            # nn.Dropout(0.1),
            
            nn.Conv2d(128,256, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            # nn.Dropout(0.2),

            nn.Flatten(),
            nn.Linear(256 * (in_shape[1]//32) * (in_shape[2]//32),1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, out_classes)
        )
      
    def forward(self, x):
      return self.network(x)
    
    def save(self, filename):
        with open(filename, 'wb') as f:
            pickle.dump(self.__dict__, f)

    def load(self, filename):
        with open(filename, 'rb') as f:
            state = pickle.load(f)
            self.__dict__.update(state)

In [21]:
import torch.optim as optim

def train(model, train_dataloader, test_dataloader, num_epochs=10, learning_rate=0.001):
    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.001)

    # Track the best validation accuracy
    best_acc = 0.0

    # Train the model for num_epochs
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Train the model for one epoch
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for images, labels in train_dataloader:
            # Move the data to the GPU if available
            images = images.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass and compute the loss
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and update the weights
            loss.backward()
            optimizer.step()

            # Compute the training accuracy
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

            # Add the batch loss to the total loss
            train_loss += loss.item() * images.size(0)

        # Compute the average training loss and accuracy for the epoch
        train_loss = train_loss / len(train_dataloader.dataset)
        train_acc = 100 * train_correct / train_total

        print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}')

        # Validate the model every 5 epochs
        if (epoch+1) % 5 == 0:
            # Evaluate the model on the validation set
            model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for images, labels in test_dataloader:
                    # Move the data to the GPU if available
                    images = images.to(device)
                    labels = labels.to(device)

                    # Forward pass and compute the loss
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                    # Compute the validation accuracy
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()

                    # Add the batch loss to the total loss
                    val_loss += loss.item() * images.size(0)

            # Compute the average validation loss and accuracy for the epoch
            val_loss = val_loss / len(test_dataloader.dataset)
            val_acc = 100 * val_correct / val_total

            print(f'Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}')

            # Check if the current model has the best validation accuracy so far
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model.state_dict(), f'Data/params/best_model_tLoss{round(train_loss,2)}_tAcc{round(train_acc,2)}_vAcc{round(val_acc,2)}.pt')

        print()

In [27]:
img_shape = train_dataset[0][0].shape
model = Net(img_shape).to(device)
print(f'Parameter count: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}')

Parameter count: 13,912,707


In [28]:
train(model, train_loader, validation_loadder, num_epochs=100, learning_rate=0.7e-3)

Epoch 1/100
----------
Train Loss: 1.0563 | Train Acc: 45.67

Epoch 2/100
----------
Train Loss: 1.0158 | Train Acc: 51.75

Epoch 3/100
----------
Train Loss: 0.9306 | Train Acc: 56.42

Epoch 4/100
----------
Train Loss: 0.9174 | Train Acc: 57.33

Epoch 5/100
----------
Train Loss: 0.8575 | Train Acc: 61.83
Val Loss: 0.7809 | Val Acc: 62.00

Epoch 6/100
----------
Train Loss: 0.8463 | Train Acc: 61.08

Epoch 7/100
----------
Train Loss: 0.8059 | Train Acc: 65.08

Epoch 8/100
----------
Train Loss: 0.7878 | Train Acc: 68.00

Epoch 9/100
----------
Train Loss: 0.7925 | Train Acc: 65.08

Epoch 10/100
----------
Train Loss: 0.7590 | Train Acc: 67.17
Val Loss: 1.1323 | Val Acc: 50.00

Epoch 11/100
----------
Train Loss: 0.7124 | Train Acc: 68.50

Epoch 12/100
----------
Train Loss: 0.7223 | Train Acc: 68.58

Epoch 13/100
----------
Train Loss: 0.7053 | Train Acc: 71.08

Epoch 14/100
----------
Train Loss: 0.6831 | Train Acc: 70.00

Epoch 15/100
----------
Train Loss: 0.6848 | Train Acc: 70.

In [31]:
model.load_state_dict(torch.load("Data/params/best_model_tLoss0.36_tAcc85.92_vAcc87.33.pt", map_location=device))

<All keys matched successfully>

In [32]:
torch.save(model, "Data/Params/best_model_tLoss0.36_vAcc87.33.model")

In [12]:
# print(model)

In [13]:
# torch.save(model.state_dict(), "Data/params/best_model_loss0.31_vAcc88(for further training).pt")

In [None]:
def calculate_confusion(predictions, labels):
  predictions = predictions.numpy(force=True)
  labels = labels.numpy(force=True)
  for x, y in zip(labels, predictions):
    arr[x][y] += 1
  return 0

In [None]:
def print_array(arr):
  # v labels
  # > predictions

  ws = len(max(train_dataset.classes, key=len))
  print(f"%{ws}s |" %(""), end = ' ')

  for x in train_dataset.classes:
    print("%s |" % (x), end = ' ')
  print("\n")

  for x in range(3):
    print(f"%{ws}s |" % (train_dataset.classes[x]), end = ' ')
    for y in range(3):
      print(f"%{len(train_dataset.classes[y])}.d |" % (arr[x][y]), end = ' ')
    print("\n")


In [33]:
arr = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]])

with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)


        _, predicted = torch.max(outputs.data, 1)
        predicted = predicted.to(device)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        calculate_confusion(predicted, labels)

    print('Accuracy on the test set: {:.2f}%'.format(100 * correct / total))
    print("\n")
    print_array(arr)

Accuracy on the test set: 85.33%


      | bird | car | house | 

 bird |   46 |   1 |     3 | 

  car |    5 |  41 |     4 | 

house |    7 |   2 |    41 | 

