In [None]:
pip install openimages

In [None]:
pip install torch torchvision

In [19]:
import os
import glob
import PIL
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from openimages.download import download_dataset
from PIL import Image

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns

Defining transformations

In [20]:
transformTrain = transforms.Compose([
  transforms.RandomHorizontalFlip(),
  transforms.Resize((128, 128)),
  transforms.Pad(2),
  transforms.RandomCrop((128,128)),
  transforms.RandomRotation(10),
  transforms.ToTensor()
])
transformTest = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(), ])

Loading datasets

In [None]:
data_dir = "data"
number_for_samples = 500
classes = ["Orange", "Umbrella", "Strawberry"]


if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    download_dataset(data_dir, classes, limit=number_for_samples)
else:
  print("Dataset already exists")

We create a file object with all of the files loaded according to class

In [22]:
images_dir = "./data"
files = { c : [] for c in classes}
for c in classes:
  files[c] = glob.glob(images_dir + "/{}/images/*.jpg".format(c.lower()))

Slicing files array to train files and test files

In [23]:
trainFiles = {c : [] for c in classes}
testFiles = {c : [] for c in classes}
for c in classes:
    trainFiles[c] = files[c][:int(len(files[c]) * 0.8)]
    testFiles[c] = files[c][-int(len(files[c]) * 0.2):]

In [24]:
class CustomDataset(Dataset):
    def __init__(self, files, transform):
        self.files = files
        self.transform = transform

        # get each class lengths
        self.length = {c : [] for c in classes}
        for c in classes:
          self.length[c] = len(files[c])

        self.all_files = [item for sublist in files.values() for item in sublist]

        # set labels
        self.labels = np.zeros(len(self.all_files))
        self.labels[self.length[classes[0]]:self.length[classes[0]]+self.length[classes[1]]] = 1
        self.labels[self.length[classes[0]]+self.length[classes[1]]:] = 2

        # shuffle data
        self.order =  [x for x in np.random.permutation(len(self.labels))]
        self.all_files = [self.all_files[x] for x in self.order]
        self.labels = [self.labels[x] for x in self.order]


    def __len__(self):
        return (len(self.all_files))

    def __getitem__(self, i):
        file = self.all_files[i]

        image = Image.open(file).convert('RGB')
        img = self.transform(image)

        label = self.labels[i]
        return (img, label)

Creating test and train datasets, dataloaders

In [25]:
train_dataset = CustomDataset(trainFiles, transformTrain)
test_dataset = CustomDataset(testFiles, transformTest)

num_workers = 2
batch_size = 8
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [None]:
img = train_dataset[0][0].numpy()
plt.imshow(img.transpose(1,2,0))

In [None]:
class CNN(nn.Module):
    def __init__(self, classes_count):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(classes_count, 16, 5)
        self.conv2 = nn.Conv2d(16, 32, 5)

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(32 * 29 * 29, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, classes_count)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 29 * 29) 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

num_classes = 3
net = CNN(num_classes)
print(net)

Defining helper methods

In [29]:
def get_acc(outputs, labels):
    with torch.no_grad():
        outputLabels = torch.argmax(outputs, 1) # gets the highest prediction for each batch element
        return torch.sum((labels == outputLabels).float())

In [30]:
def set_to_device(data, device):
    return (d.to(device) for d in data)

In [33]:
DEVICE = 'cuda'

N_train = len(train_dataset)
N_test = len(test_dataset)


def train(network, n_epochs, lr):
  network.to(DEVICE)

  optimizer = torch.optim.Adam(network.parameters(), lr=lr)
  criterion = nn.CrossEntropyLoss()

  for epoch in range(0, n_epochs):

      train_loss = 0.0
      train_acc = 0.0
      test_loss = 0.0
      test_acc = 0.0

      for i, data in enumerate(train_loader, 0):
          images, labels = set_to_device(data, DEVICE)

          optimizer.zero_grad()
          outputs = net(images)

          loss = criterion(outputs, labels.long())

          train_loss += loss.item() * images.size(0)
          train_acc += get_acc(outputs, labels)

          loss.backward()
          optimizer.step()

      for data in test_loader:
          with torch.no_grad():
            images, labels = set_to_device(data, DEVICE)
            outputs = net(images)

            loss = criterion(outputs, labels.long())
            test_loss += loss.item() * images.size(0)
            test_acc += get_acc(outputs, labels)


      print('Epoch: {} | Train Loss: {:.6f} | Train Acc: {:.3f} | Test Loss: {:.6f} | Test Acc: {:.3f}'.format(
            epoch,
            train_loss / N_train,
            train_acc / N_train,
            test_loss / N_test,
            test_acc / N_test,
            ))

  torch.cuda.synchronize()

  print('Finished Training')
  torch.save(net.state_dict(), 'trained_model_parameters.pth')

# This is where all the changes were implemented

In [None]:
# smaller learning rate
train(net, 30, 1e-5)
train(net, 20, 1e-6)
train(net, 10, 1e-7)

torch.save(net.state_dict(), 'trained_model_parameters.pth')

# statistics


In [74]:
def printConfusionMatrix(conf_matrix, classes):
  plt.figure(figsize=(8, 6))
  sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Reds", cbar=False, xticklabels=classes, yticklabels=classes)
  plt.xlabel("Predicted Labels")
  plt.ylabel("True Labels")
  plt.title("Confusion Matrix")
  plt.show()

In [None]:
true_values = []
predicted_values = []

with torch.no_grad():
    for data in test_loader:
        images, labels = set_to_device(data, DEVICE)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        true_values.extend(labels.cpu().numpy())
        predicted_values.extend(predicted.cpu().numpy())

accuracy = accuracy_score(true_values, predicted_values)
print(f"Accuracy: {accuracy:.5f}")

recall = recall_score(true_values, predicted_values, average='weighted')
print(f"Recall: {recall:.5f}")

precision = precision_score(true_values, predicted_values, average='weighted')
print(f"Precision: {precision:.5f}")

f1 = f1_score(true_values, predicted_values, average='weighted')
print(f"F1 Score: {f1:.5f}")

In [None]:
conf_matrix = confusion_matrix(true_values, predicted_values)
printConfusionMatrix(conf_matrix, classes)