In [1]:
import torch
from torch import nn
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
# lets try this with Classification problem
from torchvision.models import resnet18
from torchvision.models import ResNet18_Weights

resnet18_model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\mazen/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
 83%|████████████████████████████████████████████████████████████████             | 37.1M/44.7M [00:13<00:02, 3.70MB/s]

In [None]:
resnet18_model

In [None]:
data_transform = transforms.Compose([
    transforms.Resize(size=(28, 28)),
    transforms.RandomHorizontalFlip(p=0.7),
    transforms.RandomRotation(10),  # Added rotation
    transforms.ToTensor(),
 

]) 

In [None]:
train_data = datasets.ImageFolder(root="/kaggle/input/geometric-shapes-mathematics/dataset/train",
                                  transform=data_transform, # a transform for the data
                                  target_transform=None) # a transform for the label/target 

test_data = datasets.ImageFolder(root="/kaggle/input/geometric-shapes-mathematics/dataset/test",
                                 transform=data_transform)


validation_data = datasets.ImageFolder(root="/kaggle/input/geometric-shapes-mathematics/dataset/val",
                                 transform=data_transform)
train_data, test_data ,validation_data

In [None]:
train_data[0]

In [None]:
# Get class names as dict
class_names = train_data.classes
class_names

In [None]:
# Get class names as dict
class_dict = train_data.class_to_idx
class_dict

In [None]:
# Check the lengths of our dataset
len(train_data), len(test_data)

In [None]:
train_data.samples[11111]

In [None]:
# Index on the train_data Dataset to get a single image and label
img, label = train_data[100][0], train_data[100][1]
print(f"Image tensor:\n {img}")
print(f"Image shape: {img.shape}")
print(f"Image datatype: {img.dtype}")
print(f"Image label: {label}")
print(f"Label datatype: {type(label)}")
print(f"class name: {class_names[label]}")

In [None]:
import random 
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
img_permute = img.permute(1, 2, 0)

# Print out different shapes
print(f"Original shape: {img.shape} -> [color_channels, height, width]")
print(f"Image permute: {img_permute.shape} -> [height, width, color_channels]")

# Plot 
plt.figure(figsize=(12, 7))
plt.imshow(img_permute)
plt.axis("off")
plt.title(class_names[label], fontsize=14)

In [None]:
from torch.utils.data import DataLoader
BATCH_SIZE=32
train_dataloader = DataLoader(dataset=train_data,
                              batch_size=BATCH_SIZE,
                              num_workers=1,
                              shuffle=True)

test_dataloader = DataLoader(dataset=test_data,
                             batch_size=BATCH_SIZE,
                             num_workers=1,
                             shuffle=False)

train_dataloader, test_dataloader

In [None]:
len(train_dataloader), len(test_dataloader)

In [None]:
img, label = next(iter(train_dataloader))

# Batch size will now be 1, you can change the batch size if you like
print(f"Image shape: {img.shape} -> [batch_size, color_channels, height, width]")
print(f"Label shape: {label.shape}")

In [None]:
resnet18_model.fc

In [None]:
# Count the number of learnable parameters
total_params = sum(p.numel() for p in resnet18_model.parameters() if p.requires_grad)

print("Total number of learnable parameters:", total_params)

In [None]:

print(resnet18_model.fc.out_features) # -> print the out features of the resnet 18 model -> 1000 neuron
num_features = resnet18_model.fc.in_features # this is getting the 512 neurons
num_classes = 8
# now lets make an trainable mlp layer
resnet18_model.fc = nn.Sequential (
    nn.Linear(num_features,512),
    nn.Dropout(0.2),
     nn.Linear(512,256),
    nn.Linear(256,num_classes), # -> len(labels) labels according to labels
)

# we will take the fc layer after modification and print it out
resnet18_model.fc

In [None]:
img, label = train_data[1500][0], train_data[1500][1]
img_permute = img.permute(1, 2, 0)

label_name = class_names[label]
image_per = img.permute(1,2,0)
image_per.shape
plt.imshow(image_per)
plt.title(label_name)

In [None]:
from torch.optim import lr_scheduler
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet18_model.parameters(), lr=1e-4)
optimizer = torch.optim.SGD(resnet18_model.parameters(), lr=0.01, momentum=0.9)


In [None]:
from tqdm.autonotebook import tqdm

def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device = 'cpu'):

    model.train()
    train_loss = 0
    train_acc = 0

    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)

        # 1. Forward pass
        y_pred = model(X)  # output model logits

        # 2. Calculate the loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Calculate accuracy metric
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item() / len(y_pred)

    # Adjust metrics to get average loss and accuracy per batch
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)

    return train_loss  ,train_acc


In [None]:
def test_step(model:torch.nn.Module,
              dataloader:torch.utils.data.DataLoader,
              loss_fn:torch.nn.Module,
              device : torch.device = 'cpu'):
 # Put model in eval mode
  model.eval()

  # Setup test loss and test accuracy values
  test_loss, test_acc = 0,  0

  # Turn on inference mode
  with torch.inference_mode():
    # Loop through DataLoader batches
    for batch, (X, y) in enumerate(dataloader): 
      # Send data to the target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      test_pred_logits = model(X)

      # 2. Calculate the loss
      loss = loss_fn(test_pred_logits, y)
      test_loss += loss.item()
    
      # Calculate the accuracy
      test_pred_labels = test_pred_logits.argmax(dim=1)
      test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))
        
  # Adjust metrics to get average loss and accuracy per batch
  test_loss = test_loss / len(dataloader)
  test_acc = test_acc / len(dataloader)
  return test_loss, test_acc

In [None]:
# 1. Create a train function that takes in various model parameters + optimizer + dataloaders + loss function
def train(model: torch.nn.Module,
          train_dataloader,
          test_dataloader,
          optimizer,
          loss_fn: torch.nn.Module = nn.CrossEntropyLoss(),
          epochs: int = 5, 
          device=device):
  
  # 2. Create empty results dictionary
  results = {"train_loss": [],
             "train_acc": [],
             "test_loss": [],
             "test_acc": []}
  
  # 3. Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_step(model=model,
                                       dataloader=train_dataloader,
                                       loss_fn=loss_fn,
                                       optimizer=optimizer,
                                       device=device)
    test_loss, test_acc = test_step(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    device=device)
    
    # 4. Print out what's happening
    print(f"Epoch: {epoch} | Train loss: {train_loss:.4f} | Train acc: {train_acc:.4f} | Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f}")

    # 5. Update results dictionary
    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)
  
  # 6. Return the filled results at the end of the epochs
  return results

In [None]:


epochs = 10
resnet18_model.to(device)

from timeit import default_timer as timer
start_time = timer() 

# Train model_0
resnet18 = train(model=resnet18_model,
                        train_dataloader=train_dataloader,
                        test_dataloader=test_dataloader,
                        optimizer=optimizer,
                        loss_fn=loss_fn,
                        epochs=epochs)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds")

In [None]:
def make_predictions(model: torch.nn.Module,
                     data: list,
                     device: torch.device = device):
  pred_probs = []
  model.to(device)
  model.eval()
  with torch.inference_mode():
    for sample in data:
      # Prepare the sample (add a batch dimension and pass to target device)
      sample = torch.unsqueeze(sample, dim=0).to(device)

      # Forward pass (model outputs raw logits)
      pred_logit = model(sample)

      # Get prediction probability (logit -> prediction probability)
      pred_prob = torch.softmax(pred_logit.squeeze(), dim=0)

      # Get pred_prob off the GPU for further calculations
      pred_probs.append(pred_prob.cpu())

  # Stack the pred_probs to turn list into a tensor
  return torch.stack(pred_probs)

In [None]:
import random
test_samples = [] 
test_labels = []
for sample, label in random.sample(list(test_data), k=9):
  test_samples.append(sample)
  test_labels.append(label)

# View the first sample shape
test_samples[1].shape , test_labels[1]

In [None]:
pred_probs = make_predictions(model=resnet18_model,
                              data=test_samples)
# View first two prediction probabilities
pred_probs[:2]

In [None]:
pred_classes = pred_probs.argmax(dim=1)
pred_classes

In [None]:
test_labels


In [None]:
# Plot predictions
plt.figure(figsize=(9, 9))
nrows = 3
ncols = 3
for i, sample in enumerate(test_samples):
  # Create subplot
  plt.subplot(nrows, ncols, i+1)

  # Plot the target image
  sample = sample.permute(1, 2, 0) 
  plt.imshow(sample)

  # Find the prediction (in text form, e.g "Sandal")
  pred_label = class_names[pred_classes[i]]

  # Get the truth label (in text form) 
  truth_label = class_names[test_labels[i]]

  # Create a title for the plot
  title_text = f"Pred: {pred_label} | Truth: {truth_label}"

  # Check for equality between pred and truth and change color of title text
  if pred_label == truth_label:
    plt.title(title_text, fontsize=10, c="g") # green text if prediction same as truth
  else:
    plt.title(title_text, fontsize=10, c="r") 
  
  plt.axis(False);

In [None]:
# Save the model's state dictionary
torch.save(resnet18_model.state_dict(), 'resnet18_pretrained.pth')