# Setup: Importing Libraries
In this cell, we import the necessary libraries for model training and tokenization. 
We use `torch` for handling the neural network and `sklearn` for splitting the dataset.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import torch
from sklearn.model_selection import train_test_split
from datasets import load_dataset


if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")

# Load Dataset
This section loads the dataset, which consists of images of cats and dogs. The dataset is used for training and testing the neural network.

In [None]:
ds = load_dataset("microsoft/cats_vs_dogs")


In [None]:
# Plot images from the dataset

import matplotlib.pyplot as plt
import numpy as np

def plot_images(dataset, num_images=4):
  """Plots a specified number of images from the dataset."""
  fig, axs = plt.subplots(1, num_images, figsize=(15, 5))
  for i in range(num_images):
    image = dataset['train'][i]['image']
    image = np.array(image)
    axs[i].imshow(image)
    axs[i].axis('off')
    axs[i].set_title(f"Label: {dataset['train'][i]['labels']}")
  plt.show()


plot_images(ds)


# Prepare the Dataset
In this step, we prepare the dataset by applying transformations such as resizing the images, converting them to grayscale, and normalizing the pixel values. This ensures that the input data is standardized for the model.

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image

# Define transformation: Resize to 28x28 and convert to grayscale
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((28, 28)),  # Resize to 28x28
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.ToTensor(),  # Convert image to Tensor
    transforms.Normalize((0.5,), (1.0,))
])

# Apply transformation to the dataset
def transform_dataset(example):
    # Apply the transform on the 'image' field
    image = example['image']
    image = transform(image)
    example['image'] = image
    return example

# Apply transform to the dataset
ds = ds.with_format("torch")
ds = ds.map(transform_dataset)

ds['train'] = ds['train'].shuffle(seed=42)
ds = ds['train'].train_test_split(test_size=0.1)

train_loader = torch.utils.data.DataLoader([(x, y) for x,y in zip(ds['train']['image'], ds['train']['labels'])], batch_size=32)
test_loader = torch.utils.data.DataLoader([(x, y) for x,y in zip(ds['test']['image'], ds['test']['labels'])], batch_size=32)

In [None]:
# plot the images after preprocessing

import matplotlib.pyplot as plt
def plot_transformed_images(dataset, num_images=4):
  """Plots a specified number of images from the transformed dataset."""
  fig, axs = plt.subplots(1, num_images, figsize=(15, 5))
  for i in range(num_images):
    image = dataset['train'][i]['image'].permute(1, 2, 0).numpy()  # Move channel dimension to the end
    axs[i].imshow(image, cmap='gray')  # Display grayscale image
    axs[i].axis('off')
    axs[i].set_title(f"Label: {dataset['train'][i]['labels']}")
  plt.show()


plot_transformed_images(ds)


# Define the LeNet5 Model
This section defines the LeNet5 model architecture. It includes convolutional and pooling layers for feature extraction, followed by fully connected layers for classification.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [11]:
class LeNet5(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature = nn.Sequential(
            #1
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),   # 28*28->32*32-->28*28
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),  # 14*14

            #2
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),  # 10*10
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2),  # 5*5

        )
        self.flattener = nn.Flatten()
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=16*5*5, out_features=120),
            nn.ReLU(),
            nn.Linear(in_features=120, out_features=84),
            nn.ReLU(),
            nn.Linear(in_features=84, out_features=10),
        )

    def forward(self, x):
        return self.classifier(self.feature(x))


network = LeNet5()
network.to(device)
optimizer = optim.Adam(network.parameters(), lr=1e-3)

# Instantiate a torch loss function
loss_fn = nn.CrossEntropyLoss()


# Training the LeNet5 Model
In this section, we implement the training loop for the LeNet5 model. The model is trained over multiple epochs where:

1. **Forward Pass**: The input images are passed through the model to make predictions.
2. **Loss Calculation**: The predicted outputs are compared with the true labels using a loss function (e.g., CrossEntropy Loss).
3. **Backward Pass**: The gradients are computed through backpropagation to adjust the weights of the model.
4. **Optimization Step**: An optimizer (e.g., SGD or Adam) updates the weights of the model based on the computed gradients to minimize the loss.
This process is repeated over several batches of the training dataset to improve the model's performance.

In [None]:

def train(epoch):
    network.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        logits = network(data)
        loss = loss_fn(logits, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()))
    network.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            #print(data.shape)
            logits = network(data)
            train_loss += loss_fn(logits, target).item()
            pred = logits.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    train_loss /= len(train_loader.dataset)
    print('\nTraining set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    train_loss, correct, len(train_loader.dataset),
    100. * correct / len(train_loader.dataset)))
      #torch.save(network.state_dict(), '/results/model.pth')
      #torch.save(optimizer.state_dict(), '/results/optimizer.pth')


# Model Evaluation and Inference
After the model is trained, we evaluate its performance on a test set. This involves:

1. **Switch to Evaluation Mode**: The model is set to evaluation mode, disabling any layers that behave differently during training (like Dropout or BatchNorm).
2. **Forward Pass for Inference**: The test data is passed through the trained model to generate predictions.
3. **Accuracy Calculation**: The predictions are compared to the actual labels to calculate accuracy, giving insight into how well the model generalizes to unseen data.

In [None]:

def test():
  network.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = network(data)
      test_loss += F.nll_loss(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()
  test_loss /= len(test_loader.dataset)
  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


In [None]:
N_EPOCHS = 5
for epoch in range(1, N_EPOCHS + 1):
  train(epoch)
  test()


In [None]:
torch.save(network.to('cpu').state_dict(), "./cats_vs_dogs_lenet5.pth")