In [52]:
from google.colab import drive
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.functional as F
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Subset
import time

In [None]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [36]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [37]:
BATCH_SIZE = 8
NUM_WORKERS = 4

NUM_CLASSES = 10

NUM_EPOCHS = 5
LEARNING_RATE = 0.001
MOMENTUM = 0.9

In [38]:
train_dataset = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/data', train=True, download=True, transform=transform)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

test_dataset = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/data', train=False, download=True, transform=transform)


train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)



In [39]:
class ConvolutionalNeuralNetwork(nn.Module):
  def __init__(self, num_classes):
    super(ConvolutionalNeuralNetwork, self).__init__()

    self.model = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=8, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        nn.Flatten(),

        nn.Linear(16*5*5, 128),
        nn.ReLU(),

        nn.Linear(128, 64),
        nn.ReLU(),

        nn.Linear(64, num_classes)
    )

  def forward(self, X):
    return self.model(X)

In [40]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

convolutional = ConvolutionalNeuralNetwork(num_classes=NUM_CLASSES)
convolutional = convolutional.to(device)

In [41]:
def train_model(model, train_dataloader, val_dataloader, loss_function, epochs, learning_rate):
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  train_epoch_losses = []
  train_epoch_accuracies = []

  val_epoch_losses = []
  val_epoch_accuracies = []

  for epoch in range(epochs):
    model.train()

    train_current_epoch_loss = 0.0
    train_current_epoch_correct_predictions = 0
    train_current_epoch_total_predictions = 0

    for batch_idx, (X_batch, y_batch) in enumerate(train_dataloader):
      X_batch = X_batch.to(device)
      y_batch = y_batch.to(device)

      optimizer.zero_grad()
      outputs = model(X_batch)
      loss = loss_function(outputs, y_batch)
      loss.backward()
      optimizer.step()

      train_current_epoch_loss += loss.item()
      train_current_epoch_correct_predictions += (outputs.argmax(dim=1) == y_batch).sum().item()
      train_current_epoch_total_predictions += y_batch.size(0)

    train_epoch_losses.append(train_current_epoch_loss / len(train_dataloader))
    train_epoch_accuracies.append(train_current_epoch_correct_predictions / train_current_epoch_total_predictions)

    print(f'Training, Epoch {epoch + 1}/{epochs}, Loss: {train_epoch_losses[-1]}, Accuracy: {train_epoch_accuracies[-1]}')

    model.eval()

    val_current_epoch_loss = 0.0
    val_current_epoch_correct_predictions = 0
    val_current_epoch_total_predictions = 0

    with torch.no_grad():
      for batch_idx, (X_batch, y_batch) in enumerate(val_dataloader):
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        outputs = model(X_batch)
        loss = loss_function(outputs, y_batch)

        val_current_epoch_loss += loss.item()
        val_current_epoch_correct_predictions += (outputs.argmax(dim=1) == y_batch).sum().item()
        val_current_epoch_total_predictions += y_batch.size(0)

    val_epoch_losses.append(val_current_epoch_loss / len(val_dataloader))
    val_epoch_accuracies.append(val_current_epoch_correct_predictions / val_current_epoch_total_predictions)

    print(f'Validation, Epoch {epoch + 1}/{epochs}, Loss: {val_epoch_losses[-1]}, Accuracy: {val_epoch_accuracies[-1]}')

  return train_epoch_losses, train_epoch_accuracies, val_epoch_losses, val_epoch_accuracies

In [42]:
def plot_train_results(train_epoch_losses, train_epoch_accuracies, val_epoch_losses, val_epoch_accuracies):
  plt.figure(figsize=(12, 6))

  plt.subplot(1, 2, 1)
  plt.plot(train_epoch_losses, label='Training Loss', color='blue')
  plt.plot(val_epoch_losses, label='Validation Loss', color='red')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.title('Training and Validation Losses')
  plt.legend()

  plt.subplot(1, 2, 2)
  plt.plot(train_epoch_accuracies, label='Training Accuracy', color='blue')
  plt.plot(val_epoch_accuracies, label='Validation Accuracy', color='red')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.title('Training and Validation Accuracies')
  plt.legend()

  plt.tight_layout()
  plt.show()

In [43]:
train_epoch_losses, train_epoch_accuracies, val_epoch_losses, val_epoch_accuracies = train_model(convolutional, train_dataloader, val_dataloader, nn.CrossEntropyLoss(), NUM_EPOCHS, LEARNING_RATE)
plot_train_results(train_epoch_losses, train_epoch_accuracies, val_epoch_losses, val_epoch_accuracies)

KeyboardInterrupt: 

In [44]:
class myLinear(nn.Module):
  def __init__(self, in_features, out_features, device=None):
    super(myLinear, self).__init__()

    self.in_features = in_features
    self.out_features = out_features

    self.weight = nn.parameter.Parameter(torch.empty(out_features, in_features, device=device))
    self.bias = nn.parameter.Parameter(torch.empty(out_features, device=device))

    self.reset_parameters()

  def reset_parameters(self):
    scale = 1.0 / np.sqrt(self.in_features)

    nn.init.uniform_(self.weight, -scale, scale)
    nn.init.uniform_(self.bias, -scale, scale)

  def forward(self, X):
    expanded_X = X.unsqueeze(dim=2)
    y = torch.matmul(self.weight, expanded_X)
    expanded_bias = self.bias.unsqueeze(dim=0).unsqueeze(dim=2)
    y = y + expanded_bias
    y = y.squeeze(dim=2)
    return y

In [45]:
linear = nn.Linear(in_features=8, out_features=16, device=device)
my_linear = myLinear(in_features=8, out_features=16, device=device)

random_input = torch.randn(8)
random_input = random_input.unsqueeze(dim=0)
random_input = random_input.to(device)

print('Mean Absolute Difference before copy:', torch.mean(torch.abs(linear(random_input) - my_linear(random_input))).item())

my_linear.load_state_dict(linear.state_dict())

print('Mean Absolute Difference after copy:', torch.mean(torch.abs(linear(random_input) - my_linear(random_input))).item())

Mean Absolute Difference before copy: 0.5196319222450256
Mean Absolute Difference after copy: 0.0


In [46]:
class myConv2d(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, padding=0, stride=1):
    super(myConv2d, self).__init__()

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.kernel_size = kernel_size
    self.padding = padding
    self.stride = stride

    scale = 1.0 / np.sqrt(in_channels * kernel_size * kernel_size)

    self.weight = nn.parameter.Parameter(torch.rand(out_channels, in_channels, kernel_size, kernel_size))
    self.bias = nn.parameter.Parameter(torch.rand(out_channels))

    self.reset_parameters()

  def reset_parameters(self):
    scale = 1.0 / np.sqrt(self.in_channels * self.kernel_size * self.kernel_size)

    nn.init.uniform_(self.weight, -scale, scale)
    nn.init.uniform_(self.bias, -scale, scale)

  def forward(self, X):
    batch_size = X.shape[0]
    num_channels = X.shape[1]
    image_height = X.shape[2]
    image_width = X.shape[3]

    y = torch.zeros(
      batch_size,
      self.out_channels,
      (image_height + 2 * self.padding - self.kernel_size // 2 - self.kernel_size // 2) // self.stride,
      (image_width + 2 * self.padding - self.kernel_size // 2 - self.kernel_size // 2) // self.stride
    )

    if self.padding > 0:
      X = nn.functional.pad(X, (self.padding, self.padding, self.padding, self.padding), mode='constant', value=0)

    for image_idx in range(batch_size):
      for i in range(self.kernel_size // 2, image_height + 2 * self.padding - self.kernel_size // 2, self.stride):
        for j in range(self.kernel_size // 2, image_width + 2 * self.padding - self.kernel_size // 2, self.stride):
          y[image_idx, : , i - self.kernel_size // 2, j - self.kernel_size // 2] = torch.sum(
              self.weight *
              X[image_idx, : ,
                i - self.kernel_size // 2 : i + self.kernel_size // 2 + 1,
                j - self.kernel_size // 2 : j + self.kernel_size // 2 + 1], dim=(1, 2, 3)
          ) + self.bias

    return y

In [51]:
conv2d = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=5, padding=2, stride=1)
my_conv2d = myConv2d(in_channels=3, out_channels=8, kernel_size=5, padding=2, stride=1)

CHOSEN_IMAGES_SIZE = 16
permutation = torch.randperm(len(train_dataset))[:CHOSEN_IMAGES_SIZE]
chosen_images = Subset(train_dataset, permutation)

sum_absolute_difference = 0.0
for image, label in chosen_images:
  sum_absolute_difference += torch.abs(conv2d(image.unsqueeze(dim=0)) - my_conv2d(image.unsqueeze(dim=0))).sum().item()
print('Mean Absolute Difference before copy:', sum_absolute_difference / CHOSEN_IMAGES_SIZE)

my_conv2d.load_state_dict(conv2d.state_dict())

sum_absolute_difference = 0.0
for image, label in chosen_images:
  sum_absolute_difference += torch.abs(conv2d(image.unsqueeze(dim=0)) - my_conv2d(image.unsqueeze(dim=0))).sum().item()
print('Mean Absolute Difference after copy:', sum_absolute_difference / CHOSEN_IMAGES_SIZE)

Mean Absolute Difference before copy: 1911.7588806152344
Mean Absolute Difference after copy: 0.00024753916841291357


In [48]:
class myConvolutionalNeuralNetwork(nn.Module):
  def __init__(self, num_classes):
    super(myConvolutionalNeuralNetwork, self).__init__()

    self.model = nn.Sequential(
        myConv2d(in_channels=3, out_channels=8, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        myConv2d(in_channels=8, out_channels=16, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        nn.Flatten(),

        myLinear(16*5*5, 128),
        nn.ReLU(),

        myLinear(128, 64),
        nn.ReLU(),

        myLinear(64, num_classes)
    )

  def forward(self, X):
    return self.model(X)

In [50]:
my_convolutional = myConvolutionalNeuralNetwork(num_classes=NUM_CLASSES)
my_convolutional = my_convolutional.to(device)

my_convolutional.load_state_dict(convolutional.state_dict())

<All keys matched successfully>

In [59]:
def compute_accuracy_and_time(model, chosen_images):
  images = torch.stack([image[0] for image in chosen_images])
  labels = torch.tensor([image[1] for image in chosen_images])

  num_correct_predictions = 0
  num_total_predictions = 0

  if device == 'cuda':
    torch.cuda.synchronize()
  start_time = time.time()


  model.eval()

  with torch.no_grad():

    '''
    for image, label in zip(images, labels):
      image = image.unsqueeze(dim=0).to(device)
      label = label.unsqueeze(dim=0).to(device)

      outputs = model(image)

      num_correct_predictions += (outputs.argmax(dim=1) == label).sum().item()
      num_total_predictions += label.size(0)
    '''

    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)

    num_correct_predictions += (outputs.argmax(dim=1) == labels).sum().item()
    num_total_predictions += labels.size(0)

  if device == 'cuda':
    torch.cuda.synchronize()
  end_time = time.time()

  return num_correct_predictions / num_total_predictions, (end_time - start_time) / len(chosen_images)

In [60]:
CHOSEN_IMAGES_SIZE = 32
permutation = torch.randperm(len(train_dataset))[:CHOSEN_IMAGES_SIZE]
chosen_images = Subset(train_dataset, permutation)

convolutional_accuracy, convolutional_time = compute_accuracy_and_time(convolutional, chosen_images)
my_convolutional_accuracy, my_convolutional_time = compute_accuracy_and_time(my_convolutional, chosen_images)

print('Convolutional Accuracy:', convolutional_accuracy)
print('Convolutional Time:', convolutional_time)
print('My Convolutional Accuracy:', my_convolutional_accuracy)
print('My Convolutional Time:', my_convolutional_time)

print('Time Ratio:', my_convolutional_time / convolutional_time)

Convolutional Accuracy: 0.25
Convolutional Time: 0.00027588754892349243
My Convolutional Accuracy: 0.25
My Convolutional Time: 0.05421710014343262
Time Ratio: 196.51883658753948
