In [None]:
pip install openimages

In [None]:
pip install torch torchvision

In [3]:
import os
from openimages.download import download_dataset
import torch
import torchvision
from torchvision import datasets
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import numpy as np
import glob
import PIL
import matplotlib.pyplot as plt
from PIL import Image

In [4]:
data_dir = "data"
number_for_samples = 334 # for each category
classes = ["Strawberry", "Fountain", "Lemon"]

if not os.path.exists(data_dir):
    os.makedirs(data_dir)

download_dataset(data_dir, classes, limit=number_for_samples)

100%|██████████| 334/334 [00:07<00:00, 45.47it/s]
100%|██████████| 334/334 [00:07<00:00, 47.67it/s]
100%|██████████| 334/334 [00:06<00:00, 50.76it/s]


{'strawberry': {'images_dir': 'data/strawberry/images'},
 'fountain': {'images_dir': 'data/fountain/images'},
 'lemon': {'images_dir': 'data/lemon/images'}}

In [9]:
import requests
import zipfile

# url of imagenet classes because resent model is pre-trained on imagenet
# https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a#file-imagenet1000_clsidx_to_labels-txt
url = "https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a/archive/238f720ff059c1f82f368259d1ca4ffa5dd8f9f5.zip"


response = requests.get(url)
zip_file_path = "imagenet_labels.zip"
with open(zip_file_path, "wb") as zip_file:
    zip_file.write(response.content)

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall() # unzip the downloaded file

original_folder_name = "942d3a0ac09ec9e5eb3a-238f720ff059c1f82f368259d1ca4ffa5dd8f9f5"
new_folder_name = "imagenet_labels"
os.rename(original_folder_name, new_folder_name)

classes_file_path = os.path.join(new_folder_name, "imagenet1000_clsidx_to_labels.txt") # image classes that also exist in imagenet
with open(classes_file_path) as cf:
    class_list = [line.split('\'')[1].lower().split(", ") for line in cf.readlines()]
class_indexes = { c: [idx for idx, s in enumerate(class_list) if c.lower() in s][0] for c in classes } # index of my classes in imagenet

In [11]:
transform = transforms.Compose([
    torchvision.transforms.Resize((224,224)),
    # this can help in data augmentation, making the model more robust to different lighting conditions and color variations.
    torchvision.transforms.ColorJitter(hue=.05, saturation=.05),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomRotation(20, expand=False, center=None, fill=None),
    transforms.ToTensor()  # convert the transformed image into a PyTorch tensor
])

In [None]:
from torchvision.models import resnet50, ResNet50_Weights
from tqdm import tqdm as tqdm

# preparation of the existing pre-trained model
if torch.cuda.is_available():
    device = 'cuda' # cuda = gpu
else:
    device = 'cpu'

weights = ResNet50_Weights.IMAGENET1K_V2
model = resnet50(weights=weights)
model.to(device)
model.eval()

In [14]:
class CustomDataset(Dataset):
    def __init__(self, images_dir, transform):
      self.images_dir = images_dir
      self.transform = transform

      self.files = []
      self.labels = []

      self.class1_files = glob.glob(self.images_dir + "/{}/images/*.jpg".format(classes[0].lower()))
      self.class2_files = glob.glob(self.images_dir + "/{}/images/*.jpg".format(classes[1].lower()))
      self.class3_files = glob.glob(self.images_dir + "/{}/images/*.jpg".format(classes[2].lower()))
      self.class1 = len(self.class1_files)
      self.class2 = len(self.class2_files)
      self.class3 = len(self.class3_files)

      self.files = self.class1_files + self.class2_files + self.class3_files

      self.labels = np.zeros(len(self.files))
      self.labels[:self.class1] = 0 # 0 until the last index of class 1
      self.labels[self.class1:self.class1+self.class2] = 1 # selects a portion of this array, starting from index self.class1 and ending just before index self.class2
      self.labels[self.class1+self.class2:] = 2

      # shuffle data
      self.order = np.random.permutation(len(self.labels))
      self.files = [self.files[i] for i in self.order]
      self.labels = self.labels[self.order]

    def __len__(self):
        return (len(self.files)) # length

    def __getitem__(self, i):
        file = self.files[i]
        label = self.labels[i]

        image = Image.open(file).convert('RGB')
        img = self.transform(image)

        label = int(label.item())

        return (img, label)

In [15]:
dataset = CustomDataset(data_dir, transform)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=25, shuffle=True) # loads data in batches of 25, # num_workers asynchronously load data, helps with speed

In [16]:
predictions = {c: [] for c in classes}
truth_values = {c: [] for c in classes}

for image, label in data_loader:
  with torch.no_grad():
    image.to(device)
    outputs = model(image).softmax(dim=1) # a list of an image's likelihood (probability) to belong to any of the 1000 classes

    for c_name in classes:
      predictions[c_name].extend([o.detach().numpy()[class_indexes[c_name]] for o in outputs])
      truth_values[c_name].extend([l.item() == classes.index(c_name) for l in label])

In [18]:
def statistics(predictions, truth_values, thresh = 0.5):
  pred_after_thresh = (predictions >= thresh) # we set the threshold and count anything above it as a success

  TP = np.sum(np.logical_and(pred_after_thresh == 1, truth_values == 1))
  TN = np.sum(np.logical_and(pred_after_thresh == 0, truth_values == 0))
  FP = np.sum(np.logical_and(pred_after_thresh == 1, truth_values == 0))
  FN = np.sum(np.logical_and(pred_after_thresh == 0, truth_values == 1))

  statistics = {}
  statistics["accuracy"] = (TP + TN) / (TP + TN + FP + FN)
  statistics["precision"] = TP / (TP + FP)
  statistics["recall"] = TP / (TP + FN)
  statistics["F1"] = 2 / (1 / statistics["precision"] + 1 / statistics["recall"])

  return statistics

In [19]:
results = {}
results_average = {}

for c_name in classes:
  results = statistics(np.array(predictions[c_name]), np.array(truth_values[c_name]))
  print(f"{c_name} statistics:")

  for r in results:
    print(f"  {r}: {results[r]: .3f}")
    results_average[r] = (results_average[r] if r in results_average else 0) + results[r]


print('Average:')
for r in results_average:
  print(f"  {r}: {results_average[r] / len(classes): .3f}")

Strawberry statistics:
  accuracy:  0.684
  precision:  1.000
  recall:  0.051
  F1:  0.097
Fountain statistics:
  accuracy:  0.716
  precision:  1.000
  recall:  0.147
  F1:  0.256
Lemon statistics:
  accuracy:  0.686
  precision:  1.000
  recall:  0.057
  F1:  0.108
Average:
  accuracy:  0.695
  precision:  1.000
  recall:  0.085
  F1:  0.153
