In [None]:
import torch

## Creating Layers

In [3]:
class DenseLayer:
  # Layer initialization
  def __init__(self, n_inputs, n_neurons):
    # Initialize weights and biases
    self.weights = 0.01 * torch.rand(n_inputs, n_neurons)
    self.biases = torch.zeros((1, n_neurons))

  # Forward pass
  def forward(self, inputs):
    # record the inputs
    self.inputs = inputs
    # Calculate output values from inputs, weights and biases
    self.output = torch.matmul(inputs, self.weights) + self.biases

  # Backward pass
  def backward(self, dvalues):
    # Gradients on parameters
    self.dweights = torch.dot(self.inputs.T, dvalues)
    self.dbiases = torch.sum(dvalues, axis=0, keepdims=True)
    # Gradient on values
    self.dinputs = torch.dot(dvalues, self.weights.T)

## Activation Functions

In [4]:
class Activation_ReLU:

  # Forward pass
  def forward(self, inputs):
    # Remember input values
    self.inputs = inputs
    self.output = torch.max(torch.tensor(0),inputs)
  # Backward pass
  def backward(self, dvalues):
    self.dinputs = dvalues
    # Zero gradient where input values were negative
    self.dinputs[self.inputs <= 0] = 0


class Activation_Softmax:

  # Forward pass
  def forward(self, inputs):
    exp_values = torch.exp(inputs - torch.max(inputs, axis=1, keepdim=True).values)
    probabilities = exp_values / torch.sum(exp_values, axis=1, keepdim=True)
    self.output = probabilities

  # Backward pass
  def backward(self, dvalues):
    # Create uninitialized array
    self.dinputs = torch.empty_like(dvalues)
    # Enumerate outputs and gradients
    for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
      # Flatten output array
      single_output = single_output.reshape(-1, 1)
      # Calculate Jacobian matrix of the output and
      jacobian_matrix = torch.diagflat(single_output) - torch.dot(single_output, single_output.T)
    self.dinputs[index] = torch.dot(jacobian_matrix, single_dvalues)


class Activation_Sigmoid:

  # Forward pass
  def forward(self, inputs):
    self.output = 1 / (1 + torch.exp(-inputs))

  # Backward pass
  def backward(self, dvalues):
    # Derivative of sigmoid
    sigmoid_derivative = self.output * (1 - self.output)
    # Calculate sample-wise gradient
    self.dinputs = dvalues * sigmoid_derivative

## Loss

In [5]:
class Loss_CategoricalCrossentropy() :

  # Forward pass
  def forward(self, y_pred, y_true):
    samples = len(y_pred)
    # Clip data to prevent division by 0
    # Clip both sides to not drag mean towards any value
    y_pred_clipped = torch.clip(y_pred, 1e-8, 1 - 1e-8)
    # only if categorical labels
    if len(y_true.shape) == 1:
      correct_confidences = y_pred_clipped[range(samples), y_true]
    # Mask values - only for one-hot encoded labels
    elif len(y_true.shape) == 2:
      correct_confidences = torch.sum(y_pred_clipped * y_true, axis=1)
    log_loss = -torch.log(correct_confidences)
    data_loss = torch.mean(log_loss)
    return data_loss

  # Backward pass
  def backward(self, dvalues, y_true):
    # Number of samples
    samples = len(dvalues)
    labels = len(dvalues[0])
    # If labels are sparse, turn them into one-hot vector
    if len(y_true.shape) == 1:
      y_true = torch.eye(labels)[y_true]
    # Calculate gradient
    self.dinputs = -y_true / dvalues
    # Normalize gradient
    self.dinputs = self.dinputs / samples


class Loss_MeanSquaredError():
    # Forward pass
    def forward(self, y_pred, y_true):
        # Calculate the mean squared error loss
        data_loss = torch.mean((y_pred - y_true) ** 2)
        return data_loss

    # Backward pass
    def backward(self, dvalues, y_true):
        # Number of samples
        samples = len(dvalues)
        # Gradient of the Mean Squared Error loss
        self.dinputs = 2 * (dvalues - y_true) / samples
        return self.dinputs


## Accuracy

In [6]:
class Accuracy():
  def calculate(self, y_pred, y_true):
    predictions = torch.argmax(y_pred, axis=1)
    if len(y_true.shape) == 2:
      y_true = torch.argmax(y_true, axis=1)
    accuracy = torch.mean((predictions == y_true).float())
    return accuracy

## Optimizers

In [7]:
class Optimizer_SGD:
  # Initialize optimizer - set settings,
  def __init__(self, learning_rate=0.01):
    self.learning_rate = learning_rate
  # Update parameters
  def update_params(self, layer):
    layer.weights += -self.learning_rate * layer.dweights
    layer.biases += -self.learning_rate * layer.dbiases

## Training and Testing

In [10]:
import numpy as np
from sklearn.datasets import load_iris
import torch

In [8]:
# Load the Iris dataset from scikit-learn
iris = load_iris()
X = iris.data
y = iris.target

In [11]:
# Convert the NumPy arrays to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.int64)

In [12]:
print("X shape:", X.shape)
print("y shape:", y.shape)
print("Feature names:", iris.feature_names)
print("Class names:", iris.target_names)
print(X[:5])
print(y[:5])

X shape: torch.Size([150, 4])
y shape: torch.Size([150])
Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Class names: ['setosa' 'versicolor' 'virginica']
tensor([[5.1000, 3.5000, 1.4000, 0.2000],
        [4.9000, 3.0000, 1.4000, 0.2000],
        [4.7000, 3.2000, 1.3000, 0.2000],
        [4.6000, 3.1000, 1.5000, 0.2000],
        [5.0000, 3.6000, 1.4000, 0.2000]])
tensor([0, 0, 0, 0, 0])


In [13]:
dense1 = DenseLayer(4, 16)
activation1 = Activation_ReLU()
dense2 = DenseLayer(16, 3)
activation2 = Activation_Softmax()
loss = Loss_CategoricalCrossentropy()
optimizer = Optimizer_SGD()