In [19]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def mean_squared_error_derivative(y_true, y_pred):
    return y_pred - y_true

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.z2  # No activation for the output layer (regression)
        return self.a2

    def backward(self, X, y, output, learning_rate):
        m = y.shape[0]

        # Calculate the loss derivative
        loss_derivative = mean_squared_error_derivative(y, output)

        # Backpropagation
        dW2 = np.dot(self.a1.T, loss_derivative) / m
        db2 = np.sum(loss_derivative, axis=0, keepdims=True) / m
        dW1 = np.dot(X.T, (np.dot(loss_derivative, self.W2.T) * sigmoid_derivative(self.a1))) / m
        db1 = np.sum(np.dot(loss_derivative, self.W2.T) * sigmoid_derivative(self.a1), axis=0, keepdims=True) / m

        # Update weights
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output, learning_rate)
            if (epoch+1) % 100 == 0:
                loss = mean_squared_error(y, output)
                print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}')

                

In [20]:
NN = NeuralNetwork(input_size=  100 , hidden_size =  8 , output_size = 2 )


In [22]:
NN.input_size

100

In [3]:

# Generate synthetic data
np.random.seed(42)
X = np.random.rand(1000, 3)  # 1000 samples, 3 features
y = X[:, 0] * 2 + X[:, 1] * -3 + X[:, 2] * 5 + np.random.randn(1000) * 0.1  # Target variable with some noise
y = y.reshape(-1, 1)  # Reshape y to be a column vector

# Initialize the neural network
input_size = X.shape[1]
hidden_size = 10
output_size = 1
nn = NeuralNetwork(input_size, hidden_size, output_size)

# Train the neural network
epochs = 1000
learning_rate = 0.01
nn.train(X, y, epochs, learning_rate)

Epoch 100/1000, Loss: 2.1638
Epoch 200/1000, Loss: 1.7674
Epoch 300/1000, Loss: 1.4283
Epoch 400/1000, Loss: 1.1419
Epoch 500/1000, Loss: 0.9041
Epoch 600/1000, Loss: 0.7100
Epoch 700/1000, Loss: 0.5542
Epoch 800/1000, Loss: 0.4311
Epoch 900/1000, Loss: 0.3351
Epoch 1000/1000, Loss: 0.2610


In [60]:

import numpy as np

def conv3d(input, kernel, stride=1, padding=0):
    input_padded = np.pad(input, ((0, 0), (padding, padding), (padding, padding), (padding, padding)), mode='constant', constant_values=0)
    C, D, H, W = input.shape
    K, _, kD, kH, kW = kernel.shape
    out_D = (D - kD + 2 * padding) // stride + 1
    out_H = (H - kH + 2 * padding) // stride + 1
    out_W = (W - kW + 2 * padding) // stride + 1
    output = np.zeros((K, out_D, out_H, out_W))
    for k in range(K):
        for d in range(out_D):
            for h in range(out_H):
                for w in range(out_W):
                    d_start = d * stride
                    h_start = h * stride
                    w_start = w * stride
                    d_end = d_start + kD
                    h_end = h_start + kH
                    w_end = w_start + kW
                    output[k, d, h, w] = np.sum(input_padded[:, d_start:d_end, h_start:h_end, w_start:w_end] * kernel[k, :, :, :, :])
    return output

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def squared_error_loss(output, target):
    return 0.5 * np.sum((output - target) ** 2)

def squared_error_loss_derivative(output, target):
    return output - target

def conv3d_backward(input, kernel, output_grad, stride=1, padding=0):
    input_padded = np.pad(input, ((0, 0), (padding, padding), (padding, padding), (padding, padding)), mode='constant', constant_values=0)
    C, D, H, W = input.shape
    K, _, kD, kH, kW = kernel.shape
    out_D, out_H, out_W = output_grad.shape[1:]
    input_grad = np.zeros_like(input_padded)
    kernel_grad = np.zeros_like(kernel)
    for k in range(K):
        for d in range(out_D):
            for h in range(out_H):
                for w in range(out_W):
                    d_start = d * stride
                    h_start = h * stride
                    w_start = w * stride
                    d_end = d_start + kD
                    h_end = h_start + kH
                    w_end = w_start + kW
                    input_grad[:, d_start:d_end, h_start:h_end, w_start:w_end] += output_grad[k, d, h, w] * kernel[k, :, :, :, :]
                    kernel_grad[k, :, :, :, :] += output_grad[k, d, h, w] * input_padded[:, d_start:d_end, h_start:h_end, w_start:w_end]
    if padding > 0:
        input_grad = input_grad[:, padding:-padding, padding:-padding, padding:-padding]
    return input_grad, kernel_grad

# Example usage
if __name__ == "__main__":
    input_volume = np.random.rand(3, 10, 10, 10)
    kernel = np.random.rand(2, 3, 3, 3, 3)
    target = np.random.rand(10)
    learning_rate = 0.01

    # Forward pass
    conv_output = conv3d(input_volume, kernel, stride=1, padding=1)
    activated_output = relu(conv_output)
    loss = squared_error_loss(activated_output, target)

    # Backward pass
    loss_grad = squared_error_loss_derivative(activated_output, target)
    relu_grad = relu_derivative(conv_output) * loss_grad
    input_grad, kernel_grad = conv3d_backward(input_volume, kernel, relu_grad, stride=1, padding=1)

    # Update weights
    kernel -= learning_rate * kernel_grad

    print("Loss:", loss)
    print("Kernel Gradient Shape:", kernel_grad.shape)
    print("Input Gradient Shape:", input_grad.shape)



Loss: 320756.6128977641
Kernel Gradient Shape: (2, 3, 3, 3, 3)
Input Gradient Shape: (3, 10, 10, 10)


In [18]:

import numpy as np

def conv2d(input, kernel, stride=1, padding=0):
    batch_size, H, W, C = input.shape
    K, kH, kW, _ = kernel.shape
    out_H = (H - kH + 2 * padding) // stride + 1
    out_W = (W - kW + 2 * padding) // stride + 1
    output = np.zeros((batch_size, out_H, out_W, K))
    input_padded = np.pad(input, ((0, 0), (padding, padding), (padding, padding), (0, 0)), mode='constant', constant_values=0)
    for b in range(batch_size):
        for k in range(K):
            for h in range(out_H):
                for w in range(out_W):
                    h_start = h * stride
                    w_start = w * stride
                    h_end = h_start + kH
                    w_end = w_start + kW
                    output[b, h, w, k] = np.sum(input_padded[b, h_start:h_end, w_start:w_end, :] * kernel[k, :, :, :])
    return output

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def squared_error_loss(output, target):
    return 0.5 * np.sum((output - target) ** 2)

def squared_error_loss_derivative(output, target):
    return output - target

def conv2d_backward(input, kernel, output_grad, stride=1, padding=0):
    batch_size, H, W, C = input.shape
    K, kH, kW, _ = kernel.shape
    out_H, out_W = output_grad.shape[1:3]
    input_grad = np.zeros_like(input)
    kernel_grad = np.zeros_like(kernel)
    input_padded = np.pad(input, ((0, 0), (padding, padding), (padding, padding), (0, 0)), mode='constant', constant_values=0)
    input_grad_padded = np.pad(input_grad, ((0, 0), (padding, padding), (padding, padding), (0, 0)), mode='constant', constant_values=0)
    
    for b in range(batch_size):
        for k in range(K):
            for h in range(out_H):
                for w in range(out_W):
                    h_start = h * stride
                    w_start = w * stride
                    h_end = h_start + kH
                    w_end = w_start + kW
                    input_grad_padded[b, h_start:h_end, w_start:w_end, :] += output_grad[b, h, w, k] * kernel[k, :, :, :]
                    kernel_grad[k, :, :, :] += output_grad[b, h, w, k] * input_padded[b, h_start:h_end, w_start:w_end, :]
    
    if padding > 0:
        input_grad = input_grad_padded[:, padding:-padding, padding:-padding, :]
    else:
        input_grad = input_grad_padded
    
    return input_grad, kernel_grad

def flatten(input):
    batch_size = input.shape[0]
    return input.reshape(batch_size, -1)

def dense(input, weights, biases):
    return np.dot(input, weights) + biases

def dense_backward(input, weights, output_grad):
    input_grad = np.dot(output_grad, weights.T)
    weights_grad = np.dot(input.T, output_grad)
    biases_grad = np.sum(output_grad, axis=0)
    return input_grad, weights_grad, biases_grad

def train(input_volume, target, kernel, dense_weights, dense_biases, learning_rate=0.01, epochs=10, stride=1, padding=1):
    for epoch in range(epochs):
        # Forward pass
        conv_output = conv2d(input_volume, kernel, stride=stride, padding=padding)
        activated_output = relu(conv_output)
        flattened_output = flatten(activated_output)
        dense_output = dense(flattened_output, dense_weights, dense_biases)
        loss = squared_error_loss(dense_output, target)
        
        # Backward pass
        loss_grad = squared_error_loss_derivative(dense_output, target)
        dense_input_grad, dense_weights_grad, dense_biases_grad = dense_backward(flattened_output, dense_weights, loss_grad)
        relu_grad = relu_derivative(activated_output) * dense_input_grad.reshape(activated_output.shape)
        input_grad, kernel_grad = conv2d_backward(input_volume, kernel, relu_grad, stride=stride, padding=padding)
        
        # Update weights
        kernel -= learning_rate * kernel_grad
        dense_weights -= learning_rate * dense_weights_grad
        dense_biases -= learning_rate * dense_biases_grad
        
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss}")

# Example usage
if __name__ == "__main__":
    n_samples = 100
    h = 7
    w = 7
    s = 3
    l = 64
    output_size = 1  # Assuming a single output per sample
    input_volume = np.random.rand(n_samples, h, w, s)  # Batch of 5 samples, each of shape (h, w, s)
    kernel = np.random.rand(l, 3, 3, s)  # 2 output channels, kernel size 3x3, s input channels
    target = np.random.rand(n_samples, output_size)  # Batch of 5 target samples, each of shape (output_size)
    dense_weights = np.random.rand(h * w * l, output_size)  # Fully connected layer weights
    dense_biases = np.random.rand(output_size)  # Fully connected layer biases
    learning_rate = 0.1
    epochs = 30

    train(input_volume, target, kernel, dense_weights, dense_biases, learning_rate=learning_rate , epochs=10, stride=1, padding=1)


Epoch 1/10, Loss: 3802610578.805309
Epoch 2/10, Loss: 379306239693.6465
Epoch 3/10, Loss: 30723805414799.082
Epoch 4/10, Loss: 2488628238598339.5
Epoch 5/10, Loss: 2.0157888732646506e+17
Epoch 6/10, Loss: 1.632788987344367e+19
Epoch 7/10, Loss: 1.3225590797489372e+21
Epoch 8/10, Loss: 1.071272854596639e+23
Epoch 9/10, Loss: 8.677310122232775e+24
Epoch 10/10, Loss: 7.02862119900855e+26


In [15]:


import numpy as np

class Conv2DLayer:
    def __init__(self, input_channels, output_channels, kernel_size, stride=1, padding=0):
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.kernel = np.random.rand(output_channels, kernel_size, kernel_size, input_channels)
    
    def forward(self, input):
        batch_size, H, W, C = input.shape
        K, kH, kW, _ = self.kernel.shape
        out_H = (H - kH + 2 * self.padding) // self.stride + 1
        out_W = (W - kW + 2 * self.padding) // self.stride + 1
        output = np.zeros((batch_size, out_H, out_W, K))
        input_padded = np.pad(input, ((0, 0), (self.padding, self.padding), (self.padding, self.padding), (0, 0)), mode='constant', constant_values=0)
        
        for b in range(batch_size):
            for k in range(K):
                for h in range(out_H):
                    for w in range(out_W):
                        h_start = h * self.stride
                        w_start = w * self.stride
                        h_end = h_start + kH
                        w_end = w_start + kW
                        output[b, h, w, k] = np.sum(input_padded[b, h_start:h_end, w_start:w_end, :] * self.kernel[k, :, :, :])
        return output

    def backward(self, input, output_grad):
        batch_size, H, W, C = input.shape
        K, kH, kW, _ = self.kernel.shape
        out_H, out_W = output_grad.shape[1:3]
        input_grad = np.zeros_like(input)
        kernel_grad = np.zeros_like(self.kernel)
        input_padded = np.pad(input, ((0, 0), (self.padding, self.padding), (self.padding, self.padding), (0, 0)), mode='constant', constant_values=0)
        input_grad_padded = np.pad(input_grad, ((0, 0), (self.padding, self.padding), (self.padding, self.padding), (0, 0)), mode='constant', constant_values=0)
        
        for b in range(batch_size):
            for k in range(K):
                for h in range(out_H):
                    for w in range(out_W):
                        h_start = h * self.stride
                        w_start = w * self.stride
                        h_end = h_start + kH
                        w_end = w_start + kW
                        input_grad_padded[b, h_start:h_end, w_start:w_end, :] += output_grad[b, h, w, k] * self.kernel[k, :, :, :]
                        kernel_grad[k, :, :, :] += output_grad[b, h, w, k] * input_padded[b, h_start:h_end, w_start:w_end, :]
        
        if self.padding > 0:
            input_grad = input_grad_padded[:, self.padding:-self.padding, self.padding:-self.padding, :]
        else:
            input_grad = input_grad_padded
        
        return input_grad, kernel_grad

class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size)
        self.biases = np.random.rand(output_size)
    
    def forward(self, input):
        return np.dot(input, self.weights) + self.biases

    def backward(self, input, output_grad):
        input_grad = np.dot(output_grad, self.weights.T)
        weights_grad = np.dot(input.T, output_grad)
        biases_grad = np.sum(output_grad, axis=0)
        return input_grad, weights_grad, biases_grad

class ConvNet:
    def __init__(self):
        self.conv_layers = []
        self.dense_layer = None
    
    def add_conv_layer(self, input_channels, output_channels, kernel_size, stride=1, padding=0):
        self.conv_layers.append(Conv2DLayer(input_channels, output_channels, kernel_size, stride, padding))
    
    def add_dense_layer(self, input_size, output_size):
        self.dense_layer = DenseLayer(input_size, output_size)
    
    def forward(self, input):
        x = input
        for layer in self.conv_layers:
            x = relu(layer.forward(x))
        x = flatten(x)
        x = self.dense_layer.forward(x)
        return x

    def backward(self, input, target):
        # Forward pass
        x = input
        activations = [x]
        for layer in self.conv_layers:
            x = relu(layer.forward(x))
            activations.append(x)
        x = flatten(x)
        dense_input = x
        dense_output = self.dense_layer.forward(x)
        loss = squared_error_loss(dense_output, target)
        
        # Backward pass
        loss_grad = squared_error_loss_derivative(dense_output, target)
        dense_input_grad, dense_weights_grad, dense_biases_grad = self.dense_layer.backward(dense_input, loss_grad)
        
        # Reshape dense_input_grad to match the shape of the last conv layer output
        dense_input_grad = dense_input_grad.reshape(activations[-1].shape)
        
        next_grad = dense_input_grad
        for i in reversed(range(len(self.conv_layers))):
            relu_grad = relu_derivative(activations[i+1]) * next_grad
            input_grad, kernel_grad = self.conv_layers[i].backward(activations[i], relu_grad)
            next_grad = input_grad
            self.conv_layers[i].kernel -= learning_rate * kernel_grad
        
        self.dense_layer.weights -= learning_rate * dense_weights_grad
        self.dense_layer.biases -= learning_rate * dense_biases_grad
        
        return loss

    def train(self, input_volume, target, learning_rate=0.01, epochs=10):
        global learning_rate 


        

SyntaxError: name 'learning_rate' is parameter and global (3563962624.py, line 123)

In [None]:
np.random.randn(28, 28)