<a href="https://colab.research.google.com/github/Chhavi0404/soc-/blob/main/Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import numpy as np
import math

In [14]:
class Activation(object):

    """
    Interface for activation functions (non-linearities).

    In all implementations, the state attribute must contain the result,
    i.e. the output of forward.
    """

    # No additional work is needed for this class, as it acts like an
    # abstract base class for the others

    # Note that these activation functions are scalar operations. I.e, they
    # shouldn't change the shape of the input.

    def __init__(self):
        self.state = None

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        raise NotImplemented

    def derivative(self):
        raise NotImplemented

In [15]:
class Identity(Activation):

    """
    Identity function (already implemented).
    """

    # This class is a gimme as it is already implemented for you as an example

    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        self.state = x
        return x

    def derivative(self):
        return 1.0

In [16]:
class Sigmoid(Activation):

    """
    Sigmoid non-linearity
    """

    # Remember do not change the function signatures as those are needed
    # to stay the same for AutoLab.

    def __init__(self):
        super(Sigmoid, self).__init__()

    def forward(self, x):
        self.state = 1 / (1 + math.exp(-x))
        return self.state

    def derivative(self):
        derivative_sigmoid_x = self.state * (1 - self.state)
        return derivative_sigmoid_x


In [17]:
class Tanh(Activation):

    """
    Tanh non-linearity
    """

    def __init__(self):
        super(Tanh, self).__init__()

    def forward(self, x):
        self.state = math.tanh(x)
        return self.state

    def derivative(self):
        derivative_tanh_x = 1 - (self.state ** 2)
        return derivative_tanh_x


In [18]:
class ReLU(Activation):

    """
    ReLU non-linearity
    """

    def __init__(self):
        super(ReLU, self).__init__()

    def forward(self, x):
        self.state = np.maximum(0, x)
        return self.state

    def derivative(self):
        if self.state > 0:
            derivative_relu_x= 1
        else:
            derivative_relu_x= 0

        return derivative_relu_x

In [19]:
class Criterion(object):
    """
    Interface for loss functions.
    """

    # Nothing needs done to this class, it's used by the following Criterion classes

    def __init__(self):
        self.logits = None
        self.labels = None
        self.loss = None

    def __call__(self, x, y):
        return self.forward(x, y)

    def forward(self, x, y):
        raise NotImplemented

    def derivative(self):
        raise NotImplemented

In [20]:
class SoftmaxCrossEntropy(Criterion):
    """
    Softmax loss
    """

    def __init__(self):
        super(SoftmaxCrossEntropy, self).__init__()

    def forward(self, x, y):
        """
        Argument:
            x (np.array): (batch size, 10)
            y (np.array): (batch size, 10)
        Return:
            out (np.array): (batch size, )
        """
        self.logits = x
        self.labels = y

    #calculate softmax with logsumexp
        max_x = np.max(x, axis =1 , keepdims=True)
        logsumexp = np.sum(np.exp(x - max_x), axis = 1, keepdims  = True)
        softmax_output = np.exp(x - max_x) / logsumexp

    #calculation of loss
        first_term = -(x*y).sum(axis=1)
        return first_term +  (max_x + np.log(logsumexp).sum(axis=1))

    def derivative(self):
        """
        Return:
            out (np.array): (batch size, 10)
        """
        return (softmax_output - self.labels)



In [21]:
class Linear():
    def __init__(self, in_feature, out_feature, weight_init_fn, bias_init_fn):

        """
        Argument:
            W (np.array): (in feature, out feature)
            dW (np.array): (in feature, out feature)
            momentum_W (np.array): (in feature, out feature)

            b (np.array): (1, out feature)
            db (np.array): (1, out feature)
            momentum_B (np.array): (1, out feature)
        """

        self.W = weight_init_fn(in_feature, out_feature)
        self.b = bias_init_fn(out_feature)

        # TODO: Complete these but do not change the names.
        self.dW = np.zeros(self.W.shape)
        self.db = np.zeros(self.b.shape)

        self.momentum_W = np.zeros(self.W.shape)
        self.momentum_b = np.zeros(self.b.shape)

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        """
        Argument:
            x (np.array): (batch size, in feature)
        Return:
            out (np.array): (batch size, out feature)
        """
        self.x = x
        out = np.matmul(self.x,self.W) + self.b
        return out


    def backward(self, delta):

        """
        Argument:
            delta (np.array): (batch size, out feature)
        Return:
            out (np.array): (batch size, in feature)
        """
        self.dW = np.dot(self.x.T, delta)/delta.shape[0]
        self.db = np.sum(delta,axis=0,keepdims=True)/ delta.shape[0]
        dx = np.dot(delta,self.W.T)
        return dx


In [23]:
class MLP(object):

    """
    A simple multilayer perceptron
    """

    def __init__(self, input_size, output_size, hiddens, activations, weight_init_fn,
                 bias_init_fn, criterion, lr):

        # Don't change this -->
        self.train_mode = True
        self.nlayers = len(hiddens) + 1
        self.input_size = input_size
        self.output_size = output_size
        self.activations = activations
        self.criterion = criterion
        self.lr = lr
        # <---------------------

        # Don't change the name of the following class attributes,
        # the autograder will check against these attributes. But you will need to change
        # the values in order to initialize them correctly

        # Initialize and add all your linear layers into the list 'self.linear_layers'
        # (HINT: self.foo = [ bar(???) for ?? in ? ])
        # (HINT: Can you use zip here?)
        self.linear_layers = [Linear(inf,outf,weight_init_fn,bias_init_fn) for inf,outf in zip([self.input_size]+hiddens,hiddens+[self.output_size])]


    def forward(self, x):
        """
        Argument:
            x (np.array): (batch size, input_size)
        Return:
            out (np.array): (batch size, output_size)
        """
        # Complete the forward pass through your entire MLP.
        for i, layer in enumerate(self.linear_layers):
          x = layer(x)
          x= self.activations[i](x)
        return x

    def zero_grads(self):
        # Use numpyArray.fill(0.0) to zero out your backpropped derivatives in each
        # of your linear and batchnorm layers.
        for layer in self.linear_layers:
            layer.dW.fill(0.0)
            layer.db.fill(0.0)

    def step(self):
        # Apply a step to the weights and biases of the linear layers.
        # (You will add momentum later in the assignment to the linear layers)

        for i in range(len(self.linear_layers)):
            layer = self.linear_layers[i]
            layer.W -= self.lr * layer.dW
            layer.b -= self.lr * layer.db

        # Do the same for batchnorm layers



    def backward(self, labels):
        # Backpropagate through the activation functions, batch norm and
        # linear layers.
        # Be aware of which return derivatives and which are pure backward passes
        # i.e. take in a loss w.r.t it's output.
        final_layer = self.activations[-1]
        final_outputs = final_layer.state
        loss = self.criterion(final_outputs, labels)
        delta = self.criterion.derivative()

        for i in range(len(self.linear_layers)-1, -1, -1):
            delta = delta*self.activations[i].derivative()
            delta = self.linear_layers[i].backward(delta)

    def error(self, labels):
        return (np.argmax(self.output, axis = 1) != np.argmax(labels, axis = 1)).sum()

    def total_loss(self, labels):
        return self.criterion(self.output, labels).sum()

    def __call__(self, x):
        return self.forward(x)

    def train(self):
        self.train_mode = True

    def eval(self):
        self.train_mode = False


