<a href="https://colab.research.google.com/github/SharlotteManganye/Deep-Learning-CNN/blob/main/Pi_Sigma_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# visualization tools
import matplotlib.pyplot as plt

# torch- Our deep learning framework
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch import optim

# from other notebooks
# import import_ipynb
# from dataloaders import DataLoader

In [None]:
# load MNIST dataset both train and test
train_data = datasets.MNIST(
    root = 'data',
    train = True,
    transform = ToTensor(),
    download = True,
)
test_data = datasets.MNIST(
    root = 'data',
    train = False,
    transform = ToTensor()
)

In [None]:
train_loader = torch.utils.data.DataLoader(train_data,
                                          batch_size=100,
                                          shuffle=True,
                                          num_workers=1)

test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size=100,
                                          shuffle=True,
                                          num_workers=1)

In [1]:


class PiSigmaCon2d(nn.Module):

    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, padding=0, bias=True):
        super(PiSigmaCon2d, self).__init__()
        self.k = kernel_size
        self.in_c = in_channel
        self.out_c = out_channel
        self.stride = stride
        self.padding = padding
        # Weights and Bias
        self.weight = nn.Parameter(torch.Tensor(self.out_channels, self.out_channels, *self.kernel_size))
        self.bias   = nn.Parameter(torch.zeros(self.out_channels)) if bias else 0

        # Weights and Bias initialization
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not 0:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)

        def conv2d(input, weight, bias=None, stride=(1,1), padding=(0,0), dilation=(1,1), groups=1):

            """
            Function to process an input with a standard convolution
            """
            batch_size, in_channels, in_h, in_w = input.shape
            out_channels, in_channels, kh, kw = weight.shape
            out_h = int((in_h - kh + 2 * padding[0]) / stride[0] + 1)
            out_w = int((in_w - kw + 2 * padding[1]) / stride[1] + 1)
            unfold = torch.nn.Unfold(kernel_size=(kh, kw), dilation=dilation, padding=padding, stride=stride)
            inp_unf = unfold(input)
            w_ =  self.weight.view(weight.size(0), -1).t()

#         calculate conv output
            if bias is None:
                out_unf = torch.exp(torch.log(torch.abs(inp_unf.transpose(1, 2))).matmul(w_).transpose(1, 2))
            else:
                out_unf = (inp_unf.transpose(1, 2).matmul(w_) + bias).transpose(1, 2)
            out = out_unf.view(batch_size, out_channels, out_h, out_w)
            return out.float()



  if self.bias is not 0:


In [None]:
model = PiSigmaCon2d()

In [None]:
# loss function
loss_func = nn.CrossEntropyLoss()

In [None]:
# Adam optimizer
optimizer = optim.Adam(model.parameters(), lr = 1e-3)

In [None]:
def train(epoch):

    # Setting model in training mode.
    model.train()

    train_loss = 0
    train_acc = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        #clear gradient
        optimizer.zero_grad()

        #FP (make a prediction)
        output = model(data.float())

        # calculating the loss
        loss = loss_func(output, target)

        #BP, Computing the gradient of the loss w.r.t the parameters of the model
        loss.backward()

        # a single step of optimization, update parameters
        optimizer.step()

        train_loss += loss.item()

        _, pred = output.max(1)
        train_acc += target.eq(pred).sum().item()

    train_acc /= len(train_loader.dataset)
    train_loss /= (batch_idx + 1) # Average per batch

    return train_loss, train_acc

In [None]:
def val():
    # setting model in evaluation mode.
    model.eval()

    val_loss = 0
    val_acc = 0

    # no gradient is needed
    # when calling a PyTorch neural network to compute output during TRAINING, you should NEVER use the no_grad() statement,
    # but when NOT TRAINING, using the no_grad() statement is optional but more principled.
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):

            #FP
            output = model(data.float())

            # loss
            val_loss += loss_func(output, target).item()
            _, pred = output.max(1)
            val_acc += target.eq(pred).sum().item()

    val_loss /= (batch_idx + 1) # Average per batch
    val_acc /= len(test_loader.dataset)
    return val_loss, val_acc

In [None]:
loss_list = []
val_loss_list = []

acc_list = []
val_acc_list = []

epochs = 10
for epoch in range(1, epochs + 1):
    print("--- Epoch {} ---".format(epoch))

    train_loss, train_acc = train(epoch)
    print('\tTrain loss: {:.4f}, acc: {:.4f}%'.format(train_loss, train_acc))
    loss_list.append(train_loss)
    acc_list.append(train_acc)

    val_loss, val_acc =  val()
    print('\tVal loss: {:.4f}, acc: {:.4f}%'.format(val_loss, val_acc))
    val_loss_list.append(val_loss)
    val_acc_list.append(val_acc)