In [2]:
import numpy as np
import matplotlib.pyplot as plt
import math
import abc
from abc import ABC, abstractmethod


np.random.seed(0)

inputs = [[1, 2, 3, 2.5],[4,5,6,7],[8,9,6,5]]


weights = [[0.2, 0.8, -0.5, 1.0],[0.5, -0.91, 0.26, -0.5],[-0.26, -0.27, 0.17, 0.87]]

def create_data(points, classes): 
    x = np.zeros((points*classes, 2)) 
    y = np.zeros(points*classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(points*class_number, points*(class_number+1))
        r = np.linspace(0.0,1,points) #radius 
        t = np.linspace(class_number*4, (class_number+1)*4, points) + np.random.randn(points)*0.2
        x[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
        y[ix] = class_number
    return x, y

X, y = create_data(100, 3)
# col = np.where(y==0, 'b', y==1, 'k', y==2, 'r')
# plt.plot(X[:, 0], X[:, 1], 'o', col=col)
        
class Layer:
    def __init__(self, inputSize, neuronSize):
        self.weights = np.random.rand(inputSize, neuronSize)
        self.biases = np.zeros((1, neuronSize))

    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

class ReluActivation:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class SoftmaxActivation:
    def forward(self, inputs):
        normalizedInputs = np.exp(inputs - np.amax(inputs, axis=1, keepdims=True))
        self.output = normalizedInputs / np.sum(normalizedInputs, axis=1, keepdims=True)

class Loss(ABC):
    @property
    @abc.abstractmethod
    def loss(self):
        pass
        
    @abstractmethod
    def calculate_loss(self, y_pred, y_true):
        pass

class CrossEntropyLoss(Loss):
    @property
    def loss(self):
        return self.__loss
    
    def calculate_loss(self, y_pred, y_true):
        clippedValues  = np.clip(y_pred, 1e-7, 1-1e-7)
        if len(y_true.shape) == 1:
            correctConfidences = clippedValues[range(len(clippedValues)), y_true]
        elif len(y_true.shape) == 2:
            correctConfidences = np.sum(clippedValues*y_true, axis=1)
        self.__loss = np.mean(-np.log(correctConfidences))
            



        
        
X, y = create_data(100, 3)

loss = CrossEntropyLoss()

l1 = Layer(2, 3)
ac1 = ReluActivation()

l2 = Layer(3, 3)
ac2 = SoftmaxActivation()

l1.forward(X)
ac1.forward(l1.output)

l2.forward(ac1.output)
ac2.forward(l2.output)

print(ac2.output[:5])

loss.calculate_loss(ac2.output, y)
print(loss.loss)
        
# relu = ReluActivation()
# l = Layer(4, 5)
# l2 = Layer(5, 10)
# l.forward(inputs)
# l2.forward(l.output)
# # print(l2.output)
# relu.forward(l2.output)
# print(relu.output)

#attention score in transformers to see which voice relates the most to the other voice etc
#this is also critical in text to speech bcs in words where they are spelt the same like saw and saw need to be able to be 
#differenciated in text to speech, or like red and read are pronouced the same and thus we
#need transformers for predicting which words makes the most sense for what the user inputed 



[[0.33333333 0.33333333 0.33333333]
 [0.33332407 0.33178906 0.33488687]
 [0.33325754 0.32987808 0.33686438]
 [0.33320352 0.32848265 0.33831383]
 [0.33323116 0.32659676 0.34017208]]
1.1271195200726416
