In [None]:
!pip install split-folders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [None]:
# Commented out IPython magic to ensure Python compatibility.
import torch
import torch.nn as nn
import random
import glob
import os
import matplotlib.pyplot as plt
import pathlib
import numpy as np
import torchvision
from tqdm import tqdm
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from splitfolders import splitfolders
from torch.optim import Adam
from PIL import Image
from typing import Tuple, List, Dict

device = 'cuda' if torch.cuda.is_available() else 'cpu'

#Util functions and calls
def show_random_img(directory):
  image_list = glob.glob(f'{directory}/*/*.jpg')
  return Image.open(random.choice(image_list))

def walk_through_directory(directory):
  for dir_path, dir_name, files in os.walk(directory):
    if(len(files) != 0):
      if(files[0][-4:-1] == ".jp"):
        print(f"There are {len(files)} photos in {dir_path}")

def plot_transformed_images(directory, n,transformation):
  image_list = glob.glob(f'{directory}/*/*.jpg')
  random_choice = random.sample(image_list, k = n)
  for choice in random_choice:
    with Image.open(choice) as img:
      f, ax = plt.subplots(1,2)
      ax[0].imshow(img)
      ax[0].set_title("Original image")
      ax[0].axis("off")

      transformed = transformation(img).permute(1,2,0)
      ax[1].imshow(transformed)
      ax[1].set_title("Transformed image")
      ax[1].axis("off")

def split_folder(input_directory, output_directory, train_ratio, test_ratio, validation_ratio):
  splitfolders.ratio(input_directory,
                     output_directory,
                     ratio=(train_ratio,test_ratio,validation_ratio),
                     move = True)

def test_model_single(model,tr_loader,input_dimension):
  img_batch, label_batch = next(iter(tr_loader))

  img_single, label_single = img_batch[0].unsqueeze(dim=0), label_batch[0]
  print(f"Single image shape: {img_single.shape}\n")

  model.eval()
  with torch.inference_mode():
    pred = model(img_single.to(device))

  print(f"Output logits:\n{pred}\n")
  print(f"Output prediction probabilities:\n{torch.softmax(pred, dim=1)}\n")
  print(f"Output prediction label:\n{torch.argmax(torch.softmax(pred, dim=1), dim=1)}\n")
  print(f"Actual label:\n{label_single}")

def prediction(model, image_path,class_names):
  target_image = torchvision.io.read_image(str(image_path)).type(torch.float64)
  target_image = target_image/255
  transform = transforms.Compose([transforms.Resize((64,64)),transforms.ToTensor()])

  model.to(device)

  model.eval()
  with torch.inference_mode():
    target_image = target_image.unsqueeze(dim=0)
    target_image_pred = model(target_image.to(device))
  
  target_image_pred_probs = torch.softmax(target_image_pred, dim=1)

  target_image_pred_label = torch.argmax(target_image_pred_probs, dim=1)
  
  plt.imshow(target_image.squeeze().permute(1, 2, 0)) 
  
  title = f"Pred: {class_names[target_image_pred_label.cpu()]} | Prob: {target_image_pred_probs.max().cpu():.3f}"

  plt.title(title)
  plt.axis(False);

class CustomDataSet(Dataset):
  def __init__(self, dir, transformer=None):
    self.paths = list(glob.glob(f'{dir}/*/*.jpg'))
    for i in range(len(self.paths)):
        self.paths[i] = self.paths[i].replace("\\","/")
    self.transform = transformer
    self.classes, self.class_to_idx = self.find_class(dir)
  
  def __len__(self):
    return len(self.paths)
  
  def __loadimage__(self,index):
    img = Image.open(self.paths[index])
    return img

  def __getitem__(self, index):
    img = self.__loadimage__(index)
    path_to_list = self.paths[index].split("/")
    class_name = path_to_list[-2]
    class_index = self.class_to_idx[class_name]
    if self.transform is not None:
      tensor = self.transform(img)
    else:
      tensor = torch.tensor(img)
    return tensor.to(device), class_index

  def __limit__(self,number):
    new_list = []
    for classes in self.classes:
      count = 0
      while(count != number):
        random_choice = random.choice(self.paths)
        index = self.paths.index(random_choice)
        file_class = self.paths[index].split("/")[-2]
        if file_class == classes:
          count+=1
          new_list.append(random_choice)
    self.paths = new_list
    print("adjustment complete")
  
  def find_class(self,directory) -> Tuple[List[str], Dict[str, int]]:
    classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
    if not classes:
      raise FileNotFoundError(f"Couldn't find any classes in {directory}.")
    class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
    return classes, class_to_idx

#Define your network here
class CNN(nn.Module):
  def __init__(self,input_shape,hidden_units): 
    super().__init__()
    self.conv1 = self.conv_block(input_shape, hidden_units,3,1,1)
    self.conv2 = self.conv_block(hidden_units,hidden_units,3,1,1)
    self.classifier = self.classification(hidden_units)
  
  def forward(self,x):
    x = self.conv1(x)
    x = self.conv2(x)
    x = self.classifier(x)
    return x

  def conv_block(self,ni,no,kernel_size,stride,padding):
    return nn.Sequential(
      nn.Conv2d(ni, no, kernel_size, stride,padding),
      nn.ReLU(),
      nn.Conv2d(no, no, kernel_size, stride,padding),
      nn.ReLU(),
      nn.BatchNorm2d(no),
      nn.MaxPool2d(2)
    )

  def classification(self,hidden_units):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(hidden_units*16*16,1),
        nn.Sigmoid()
    )

#training related functions
def train_batch(x,y,model,optimizer,loss_function):
  model.train()
  y = y.unsqueeze(-1).float().to(device)
  prediction = model(x)
  loss_value = loss_function(prediction,y)
  loss_value.backward()
  optimizer.step()
  optimizer.zero_grad()
  return loss_value.item()

def accuracy(x,y,model):
  prediction = model(x)
  is_correct = (prediction > 0.5) == y.to(device)
  return is_correct.cpu().numpy().tolist()

def training_process(trn_dl,te_dl,model,optimizer,loss_fn,epoch_range):
  train_losses, train_accuracies = [], []
  val_accuracies = []
  min_training_loss = 100000
  for epoch in range(epoch_range):
    train_epoch_losses, train_epoch_accuracies = [], []
    val_epoch_accuracies = []

    for ix, batch in enumerate(tqdm(trn_dl,desc="TRAINING LOSS")):
        x, y = batch
        batch_loss = train_batch(x, y, model, optimizer, loss_fn)
        train_epoch_losses.append(batch_loss) 
        is_correct = accuracy(x, y, model)
        train_epoch_accuracies.extend(is_correct)
    train_epoch_loss = np.array(train_epoch_losses).mean()
    train_epoch_accuracies = np.array(train_epoch_accuracies).mean()

    for ix, batch in enumerate(tqdm(te_dl,desc="VALIDATION")):
        x, y = batch
        val_is_correct = accuracy(x, y, model)
        val_epoch_accuracies.extend(val_is_correct)
    val_epoch_accuracy = np.mean(val_epoch_accuracies)

    train_losses.append(train_epoch_loss)
    train_accuracies.append(train_epoch_accuracy)
    val_accuracies.append(val_epoch_accuracy)

    print("Epoch: {}/{} | Average Loss: {:.4f} | Accuracy: {}".format(
      epoch+1,
      epoch_range,
      train_epoch_loss,
      train_epoch_accuracy
    ))

    if train_epoch_loss < min_training_loss:
      torch.save(model.state_dict(),f'Epoch_{epoch+1}_model.pth')
      print("New Model Saved!")
      
    print("---------------------------------------------------------")
  return train_losses, train_accuracies, val_accuracies

#parameters
if __name__ == '__main__':
    batch_size = 16
    num_of_workers = os.cpu_count()
    test_directory = "/content/drive/MyDrive/archive/test_set/test_set"
    train_directory = "/content/drive/MyDrive/archive/training_set/training_set"

    tr_transform = transforms.Compose([
        transforms.Resize((64,64)),
        transforms.RandomHorizontalFlip(0.2),
        transforms.RandomVerticalFlip(0.2),
        transforms.ToTensor(),
        transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))
    ])

    te_transform = transforms.Compose([
        transforms.Resize((64,64)),
        transforms.ToTensor()
    ])

    print("Loading Dataset: Training")
    tr_set = CustomDataSet(train_directory, tr_transform)

    print("Loading Dataset: Testing")
    te_set = CustomDataSet(test_directory, te_transform)

    tr_loader = DataLoader(tr_set, batch_size = batch_size, shuffle = True, drop_last = True)
    te_loader = DataLoader(te_set, batch_size = batch_size, shuffle = True, drop_last = True)
    print("Dataloaders established")

    epoch = 10
    model = CNN(3,10).to(device)
    loss = nn.BCELoss()
    optimizer = Adam(model.parameters(),lr = 1e-6)

    print("Hello World!")
    print("Beginning Training")
    training_loss, training_acc, validation_acc = training_process(tr_loader, te_loader, model, optimizer, loss, 20)
    plt.plot(training_acc,label = "")
    plt.plot(validation_acc)

Loading Dataset: Training
Loading Dataset: Testing
