In [1]:
import numpy as np

# Activation Functions


In [2]:
class ReLU:
    def forward(self, x):
        self.input = x
        self.output = np.maximum(0, x)
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.input <= 0] = 0
        return self.dinputs


class LReLU:
    def __init__(self, alpha=0.01):
        self.alpha = alpha

    def forward(self, x):
        self.input = x
        self.output = np.where(x > 0, x, self.alpha * x)
        return self.output

    def backward(self, dvalues):
        self.dinputs = np.where(self.input > 0, dvalues, dvalues * self.alpha)
        return self.dinputs


class Sigmoid:
    def forward(self, x):
        self.input = x
        self.output = 1 / (1 + np.exp(-x))
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues * (self.output * (1 - self.output))
        return self.dinputs


class Tanh:
    def forward(self, x):
        self.input = x
        self.output = np.tanh(x)
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues * (1 - self.output ** 2)
        return self.dinputs


class Softmax:
    def forward(self, x):
        exp_values = np.exp(x - np.max(x, axis=1, keepdims=True)) 
        self.output = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        return self.dinputs


class Linear:
    def forward(self, x):
        self.input = x
        self.output = x
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        return self.dinputs



In [3]:
x=np.array([[1, -2, 3], [-1, 2, -3]])
relu = ReLU()
lrelu=LReLU()
linear=Linear()
sigmoid=Sigmoid()
tanh=Tanh()
softmax=Softmax()

In [4]:
print("Relu :",relu.forward(x))
print("LReLU :",lrelu.forward(x))
print("Linear :",linear.forward(x))
print("Sigmoid :",sigmoid.forward(x))
print("Tanh :",tanh.forward(x))
print("Softmax :",softmax.forward(x))

Relu : [[1 0 3]
 [0 2 0]]
LReLU : [[ 1.   -0.02  3.  ]
 [-0.01  2.   -0.03]]
Linear : [[ 1 -2  3]
 [-1  2 -3]]
Sigmoid : [[0.73105858 0.11920292 0.95257413]
 [0.26894142 0.88079708 0.04742587]]
Tanh : [[ 0.76159416 -0.96402758  0.99505475]
 [-0.76159416  0.96402758 -0.99505475]]
Softmax : [[0.11849965 0.00589975 0.8756006 ]
 [0.04712342 0.94649912 0.00637746]]


# Layers

In [5]:
import numpy as np

class InputLayer:

    def forward(self, X):
        self.output = X
        return self.output

    def backward(self, dvalues):
        self.dinputs = dvalues
        return self.dinputs


class Dense:

    def __init__(self, input_size, output_size, activation=None, learning_rate=0.01, weight_init="random"):
        self.input_size = input_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        if weight_init == "random":
            self.weights = np.random.randn(input_size, output_size) * 0.01
        elif weight_init == "xavier":
            self.weights = np.random.randn(input_size, output_size) * np.sqrt(2 / (input_size + output_size))
        else:
            raise ValueError("Unknown weight initialization type")


        self.biases = np.zeros((1, output_size))
    
    
        self.activation = activation

    def forward(self, X):
        
        self.input = X

        self.linear_output = np.dot(X, self.weights) + self.biases
        
        if self.activation is not None:
            self.output = self.activation.forward(self.linear_output)
        else:
            self.output = self.linear_output

        return self.output

    def backward(self, dvalues):

        if self.activation is not None:
            dactivation = self.activation.backward(dvalues)
        else:
            dactivation = dvalues

        self.dweights = np.dot(self.input.T, dactivation)     
        self.dbiases = np.sum(dactivation, axis=0, keepdims=True)  
        self.dinputs = np.dot(dactivation, self.weights.T)      

        self.weights -= self.learning_rate * self.dweights
        self.biases  -= self.learning_rate * self.dbiases

        return self.dinputs
        
class Dropout:
    def __init__(self, rate):
        self.rate = 1 - rate
    def forward(self, inputs):
        self.mask = (np.random.rand(*inputs.shape) < self.rate) / self.rate
        self.output = inputs * self.mask
    def backward(self, dvalues):
        self.dinputs = dvalues * self.mask


        
 
 

# Loss Functions

In [6]:
import numpy as np

class Loss:
    def forward(self, y_pred, y_true):
        raise NotImplementedError

    def backward(self, y_pred, y_true):
        raise NotImplementedError



class MeanSquaredError(Loss):
    def forward(self, y_pred, y_true):
        self.y_true = y_true
        self.y_pred = y_pred
        return np.mean((y_true - y_pred) ** 2)

    def backward(self, y_pred, y_true):

        samples = y_true.shape[0]
        return (2 * (y_pred - y_true)) / samples



class BinaryCrossEntropy(Loss):
    def forward(self, y_pred, y_true):

        self.y_true = y_true
        self.y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return -np.mean(y_true * np.log(self.y_pred) + (1 - y_true) * np.log(1 - self.y_pred))

    def backward(self, y_pred, y_true):

        samples = y_true.shape[0]
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)
        return (y_pred - y_true) / (y_pred * (1 - y_pred) * samples)



class CategoricalCrossEntropy(Loss):
    def forward(self, y_pred, y_true):

        samples = y_pred.shape[0]
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)

        if len(y_true.shape) == 1: 
            correct_confidences = y_pred[np.arange(samples), y_true]
        else:  
            correct_confidences = np.sum(y_pred * y_true, axis=1)

        return -np.mean(np.log(correct_confidences))

    def backward(self, y_pred, y_true):

        samples = y_pred.shape[0]
        labels = y_true.shape[1] if len(y_true.shape) > 1 else np.max(y_true) + 1
        y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        return -y_true / (y_pred * samples)


In [7]:

y_true_reg = np.array([[2.0], [3.0], [4.0]])
y_pred_reg = np.array([[2.5], [2.8], [3.9]])

y_true_bin = np.array([[1], [0], [1]])
y_pred_bin = np.array([[0.9], [0.2], [0.8]])


mse = MeanSquaredError()
bce = BinaryCrossEntropy()


print("MSE Loss:", mse.forward(y_pred_reg, y_true_reg))
print("BCE Loss:", bce.forward(y_pred_bin, y_true_bin))


print("MSE Gradient:\n", mse.backward(y_pred_reg, y_true_reg))
print("BCE Gradient:\n", bce.backward(y_pred_bin, y_true_bin))


MSE Loss: 0.10000000000000003
BCE Loss: 0.18388253942874858
MSE Gradient:
 [[ 0.33333333]
 [-0.13333333]
 [-0.06666667]]
BCE Gradient:
 [[-0.37037037]
 [ 0.41666667]
 [-0.41666667]]
