In [None]:
import torch # Main PyTorch Library
from torch import nn # Used for creating the layers and loss function
from torch.optim import Adam # Adam Optimizer
import torchvision.transforms as transforms # Transform function used to modify and preprocess all the images
from torch.utils.data import Dataset, DataLoader # Dataset class and DataLoader for creating the objects
from sklearn.preprocessing import LabelEncoder # Label Encoder to encode the classes from strings to numbers
import matplotlib.pyplot as plt # Used for visualizing the images and plotting the training progress
from PIL import Image # Used to read the images from the directory
import pandas as pd # Used to read/create dataframes (csv) and process tabular data
import numpy as np # preprocessing and numerical/mathematical operations
import os # Used to read the images path from the directory

device = "cuda" if torch.cuda.is_available() else "cpu" # detect the GPU if any, if not use CPU, change cuda to mps if you have a mac
print("Device available: ", device)

In [None]:
import os

image_path = []
labels = []

for i in os.listdir("./DATA/afhq"):  # Loop through main directory
  for label in os.listdir(f"./DATA/afhq/{i}"):  # Corrected string formatting
    # print(label)
    for image in os.listdir(f"./DATA/afhq/{i}/{label}"):  # Corrected again
      image_path.append(f"DATA/afhq/{i}/{label}/{image}")  # Uncomment if needed
      labels.append(label)
      
      
data = pd.DataFrame(zip(image_path,labels),columns=["image_path","labels"])
data.head()


In [None]:
train = data.sample(frac=0.7)
test = data.drop(train.index)
val = test.sample(frac=0.5)
test = test.drop(val.index)

print(train.shape,"\n",test.shape,"\n",val.shape)

In [None]:
label_encoder = LabelEncoder()
label_encoder.fit(data["labels"])
transform = transforms.Compose([
  transforms.Resize((128,128)),
  transforms.ToTensor(),
  transforms.ConvertImageDtype(torch.float)
])

In [None]:
class CustomImageDataset(Dataset):
  def __init__(self, df, transform=None):
    self.df = df
    self.transform = transform
    self.labels = torch.tensor(label_encoder.transform(df["labels"])).to(device)
    
  def __len__(self):
    return self.df.shape[0]
  
  def __getitem__(self, idx):
    img_path = self.df.iloc[idx,0]
    label = self.labels[idx]
    
    image = Image.open(img_path).convert('RGB')
    
    if self.transform:
      image = self.transform(image).to(device)
      
    return image, label

In [None]:
train_dataset = CustomImageDataset(df=train,transform=transform)
test_dataset = CustomImageDataset(df=test,transform=transform)
val_dataset = CustomImageDataset(df=val,transform=transform)

In [None]:
train_dataset.__len__()

In [None]:
n_rows = 3
n_cols = 3

n_rows = 3
n_cols = 3
f, axarr = plt.subplots(n_rows, n_cols)
for row in range(n_rows):
  for col in range(n_cols):
    image = Image.open(data.sample(n = 1)['image_path'].iloc[0]).convert("RGB")
    axarr[row, col].imshow(image)
    axarr[row, col].axis('off')

plt.show()

In [None]:
LR = 1e-4
BATCH_SIZE = 16
EPOCHS = 10

In [None]:
train_loader = DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True)
val_loader = DataLoader(val_dataset,batch_size=BATCH_SIZE,shuffle=True)

print(train_loader)

In [None]:
class Net(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1 = nn.Conv2d(3, 32, kernel_size = 3, padding = 1) # First Convolution layer
    self.conv2 = nn.Conv2d(32, 64, kernel_size = 3, padding = 1) # Second Convolution layer
    self.conv3 = nn.Conv2d(64, 128, kernel_size = 3, padding = 1) # Third Convolution layer
    self.pooling = nn.MaxPool2d(2,2) # The pooling layer, we will be using the same layer after each conv2d.
    self.relu = nn.ReLU() # ReLU Activation function

    self.flatten = nn.Flatten() # Flatten and vectorize the output feature maps that somes from the final convolution layer.
    self.linear = nn.Linear((128 * 16 * 16), 128) # Traditional Dense (Linear)
    self.output = nn.Linear(128, len(data['labels'].unique())) # Output Linear Layer


  def forward(self, x):
    x = self.conv1(x) # -> Outputs: (32, 128, 128)
    x = self.pooling(x)# -> Outputs: (32, 64, 64)
    x = self.relu(x)
    x = self.conv2(x) # -> Outputs: (64, 64, 64)
    x = self.pooling(x) # -> Outputs: (64, 32, 32)
    x = self.relu(x)
    x = self.conv3(x) # -> Outputs: (128, 32, 32)
    x = self.pooling(x) # -> Outputs: (128, 16, 16)
    x = self.relu(x)
    x = self.flatten(x)
    x = self.linear(x)
    x = self.output(x)

    return x

model = Net().to(device) # Create an instance of the model and move it to the GPU Device

In [None]:
from torchsummary import summary

summary(model,input_size=(3,128,128))

In [None]:
criterion = nn.CrossEntropyLoss()
optimiser = Adam(model.parameters(),lr=LR)

In [None]:
total_loss_train_plot = []
total_loss_validation_plot = []
total_acc_train_plot = []
total_acc_validation_plot = []


for epoch in range(EPOCHS):
  total_acc_train = 0
  total_loss_train = 0
  total_loss_val = 0
  total_acc_val = 0

  for inputs, labels in train_loader:
    optimiser.zero_grad()
    outputs = model(inputs)
    labels = labels.to(torch.long)
    train_loss = criterion(outputs, labels)
    total_loss_train += train_loss.item()
    train_loss.backward()

    train_acc = (torch.argmax(outputs, axis = 1) == labels).sum().item()
    total_acc_train += train_acc
    optimiser.step()

  with torch.no_grad():
    for inputs, labels in val_loader:
      outputs = model(inputs)
      labels = labels.to(torch.long)  # Convert labels to torch.long
      val_loss = criterion(outputs, labels)

      total_loss_val += val_loss.item()

      val_acc = (torch.argmax(outputs, axis = 1) == labels).sum().item()
      total_acc_val += val_acc

  total_loss_train_plot.append(round(total_loss_train/1000, 4))
  total_loss_validation_plot.append(round(total_loss_val/1000, 4))
  total_acc_train_plot.append(round(total_acc_train/(train_dataset.__len__())*100, 4))
  total_acc_validation_plot.append(round(total_acc_val/(val_dataset.__len__())*100, 4))
  print(f'''Epoch {epoch+1}/{EPOCHS}, Train Loss: {round(total_loss_train/100, 4)} Train Accuracy {round((total_acc_train)/train_dataset.__len__() * 100, 4)}
              Validation Loss: {round(total_loss_val/100, 4)} Validation Accuracy: {round((total_acc_val)/val_dataset.__len__() * 100, 4)}''')
  print("="*50)

In [None]:
with torch.no_grad():
  total_loss_test = 0
  total_acc_test = 0
  for inputs,labels in test_loader:
    predictions = model(inputs)
    labels = labels.to(torch.long)
    
    acc = (torch.argmax(predictions,axis=1)==labels).sum().item()
    total_acc_test += acc
    test_loss = criterion(predictions, labels)
    total_loss_test += test_loss.item()
    
print(f"Accuracy Score is: {round((total_acc_test/test_dataset.__len__()) * 100, 4)} and Loss is {round(total_loss_test/1000, 4)}")

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

axs[0].plot(total_loss_train_plot, label='Training Loss')
axs[0].plot(total_loss_validation_plot, label='Validation Loss')
axs[0].set_title('Training and Validation Loss over Epochs')
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss')
axs[0].legend()

axs[1].plot(total_acc_train_plot, label='Training Accuracy')
axs[1].plot(total_acc_validation_plot, label='Validation Accuracy')
axs[1].set_title('Training and Validation Accuracy over Epochs')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')
axs[1].legend()

plt.tight_layout()

plt.show()

In [None]:
# 1- read image
# 2- Transform using transform object
# 3- predict through the model
# 4- inverse transform by Label encoder

def predict_image(image_path):
  image = Image.open(image_path).convert('RGB')
  image = transform(image).to(device)
  
  output = model(image.unsqueeze(0))
  output = torch.argmax(output,axis=1).item()
  return label_encoder.inverse_transform([output])

## Visualize the image
image = Image.open("./DATA/afhq/train/cat/flickr_cat_000002.jpg")
plt.imshow(image)
plt.show()


## Predict
print()
print("Prediction: \n")
predict_image("./DATA/afhq/train/cat/flickr_cat_000004.jpg")