In [None]:
import torch
from torch import nn
import numpy as np
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from random import randint

#**sections A+B+C**

In [None]:
class DropNorm(nn.Module):
  def __init__(self, in_features):
    super().__init__()
    self.epsilon = 10**-5
    self.beta = torch.rand(1)
    self.gamma = torch.rand(1)

  def forward(self, x):
      #flattening the tensor from the first dim and onward
      x = torch.flatten(x, start_dim=1)
      #creating a numpy array in the size of  half of the flattend x filled with shuffled numbers
      #will be used in the mask creation
      l = np.arange(x.shape[1])
      np.random.shuffle(l)
      l = l[:int(x.shape[1]/2)]

      #creating the mask. in every column l will get 1  
      mask = torch.zeros_like(x)
      mask[:,l] = 1
      #multiplying x with mask to get x_hat
      x_hat = mask*x

      #while in traing mode, calculating mu, sigma, x_i and y_i
      if self.training:
        mu = torch.mean(x_hat, axis=0)
        sigma = torch.std(x_hat, axis=0)
        x_i = (x_hat-mu)/torch.sqrt(sigma + self.epsilon) 
        y_i = (x_i*self.gamma + self.beta)*mask
      #eval mode
      else:
        mu = torch.zeros_like(x)
        sigma = torch.ones_like(x)
        x_i = (x_hat-mu)/torch.sqrt(sigma + self.epsilon) 
        y_i = (x_i*self.gamma + self.beta)*mask

      return y_i

In [None]:
t = torch.randn([2,3,4])
print(t)

tensor([[[ 0.1079,  3.4805,  1.1454, -0.2593],
         [ 0.2618, -0.7093,  0.7214, -0.6537],
         [ 0.4830, -0.8616,  1.7557,  0.1442]],

        [[-1.7915, -0.6267,  0.1580, -0.9848],
         [-0.8256,  1.2848, -0.0918,  1.5473],
         [ 0.1307, -0.5738,  0.1145,  0.9436]]])


In [None]:
drop = DropNorm(4)
drop.forward(t)

tensor([[ 0.0000,  1.6223,  1.2069,  1.1499,  0.0000,  0.2395,  1.1700,  0.2108,
          0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.0076,  0.4078,  0.4649,  0.0000,  1.3752,  0.4447,  1.4040,
          0.0000,  0.0000,  0.0000,  0.0000]])

In [None]:
drop.eval()
drop.forward(t)

tensor([[ 0.8803,  3.1612,  0.0000,  0.0000,  0.0000,  0.0000,  1.2953,  0.0000,
          1.1340,  0.2247,  1.9947,  0.0000],
        [-0.4042,  0.3836,  0.0000,  0.0000,  0.0000,  0.0000,  0.7453,  0.0000,
          0.8958,  0.4193,  0.8848,  0.0000]])

#**section D**

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [None]:
#downloading the FashionMNIST dataset
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)
#loading the data using dataloader
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

In [None]:
#creating the class with my model. the model consists of linear aggregation fanctios, relu, batch normaliztion and dropout layers  
class my_Fashion_MNIST(nn.Module):
    def __init__(self):
        super(my_Fashion_MNIST, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.Dropout(0.4),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    #the forward pass
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = my_Fashion_MNIST()

In [None]:
#the training loop 
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        #Computing prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        #Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
#the testing loop 
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error:\nAccuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.303611  [    0/60000]
loss: 2.121288  [ 6400/60000]
loss: 1.918516  [12800/60000]
loss: 1.781844  [19200/60000]
loss: 1.557962  [25600/60000]
loss: 1.448539  [32000/60000]
loss: 1.361832  [38400/60000]
loss: 1.278526  [44800/60000]
loss: 1.289499  [51200/60000]
loss: 1.181979  [57600/60000]
Test Error:
Accuracy: 69.2%, Avg loss: 1.112020 

Epoch 2
-------------------------------
loss: 1.124322  [    0/60000]
loss: 1.104470  [ 6400/60000]
loss: 0.897960  [12800/60000]
loss: 1.116582  [19200/60000]
loss: 0.882270  [25600/60000]
loss: 0.907101  [32000/60000]
loss: 0.913109  [38400/60000]
loss: 0.892682  [44800/60000]
loss: 0.929790  [51200/60000]
loss: 0.847502  [57600/60000]
Test Error:
Accuracy: 73.7%, Avg loss: 0.817646 

Epoch 3
-------------------------------
loss: 0.777251  [    0/60000]
loss: 0.817693  [ 6400/60000]
loss: 0.607776  [12800/60000]
loss: 0.918204  [19200/60000]
loss: 0.745086  [25600/60000]
loss: 0.715745  [32000/60000]


#**section E**

In [None]:
#creating the class with my model. the model consists of linear aggregation fanctios, relu, batch normaliztion and dropout layers and now with my
#own DropNorm layer!
class my_Fashion_MNIST_drop(nn.Module):
    def __init__(self):
        super(my_Fashion_MNIST_drop, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 512),
            DropNorm(0),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    #the forward pass
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = my_Fashion_MNIST_drop()

In [None]:
#the training loop 
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        #Computing prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        #Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
#the testing loop 
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error:\nAccuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.437863  [    0/60000]
loss: 2.314165  [ 6400/60000]
loss: 2.147945  [12800/60000]
loss: 2.093630  [19200/60000]
loss: 1.993560  [25600/60000]
loss: 1.972484  [32000/60000]
loss: 1.864919  [38400/60000]
loss: 1.851994  [44800/60000]
loss: 1.771791  [51200/60000]
loss: 1.773345  [57600/60000]
Test Error:
Accuracy: 52.1%, Avg loss: 1.737872 

Epoch 2
-------------------------------
loss: 1.732660  [    0/60000]
loss: 1.711645  [ 6400/60000]
loss: 1.584982  [12800/60000]
loss: 1.713439  [19200/60000]
loss: 1.583695  [25600/60000]
loss: 1.560463  [32000/60000]
loss: 1.478341  [38400/60000]
loss: 1.449304  [44800/60000]
loss: 1.485398  [51200/60000]
loss: 1.433247  [57600/60000]
Test Error:
Accuracy: 62.7%, Avg loss: 1.401742 

Epoch 3
-------------------------------
loss: 1.391400  [    0/60000]
loss: 1.460500  [ 6400/60000]
loss: 1.294608  [12800/60000]
loss: 1.464745  [19200/60000]
loss: 1.317314  [25600/60000]
loss: 1.262475  [32000/60000]
