<a href="https://colab.research.google.com/github/KormazovaVer/git-homeworks-neuro-pr/blob/main/Lab_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
from sklearn.model_selection import train_test_split


def map_function(data, func):
    return [func(x) for x in data]

def create_minibatches(data, batch_size):
    minibatches = []
    for i in range(0, len(data), batch_size):
        minibatch = data[i:i+batch_size]
        minibatches.append(minibatch)
    return minibatches

def shuffle_data(X, y):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], y[indices]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def cross_entropy(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(np.sum(y_true[:y_pred.shape[0]] * np.log(y_pred), axis=1))

def sgd(params, gradients, learning_rate):
    for key in params:
        param = np.asarray(params[key], dtype=np.float64)
        grad = np.asarray(gradients[key], dtype=np.float64)
        params[key] -= learning_rate * grad

def momentum_sgd(params, gradients, velocities, learning_rate, momentum):
    for param_name, grad in gradients.items():
        vel = momentum * velocities[param_name] + learning_rate * grad
        params[param_name] -= vel
        velocities[param_name] = vel

def gradient_clipping(gradients, threshold):
    for name, gradient in gradients.items():
        gradients[name] = np.clip(gradient, -threshold, threshold)


In [15]:
# iris
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


def map_function(data, func):
    return [func(x) for x in data]

def create_minibatches(data, batch_size):
    minibatches = []
    for i in range(0, len(data), batch_size):
        minibatch = data[i:i+batch_size]
        minibatches.append(minibatch)
    return minibatches

def shuffle_data(X, y):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], y[indices]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def cross_entropy(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(np.sum(y_true[:y_pred.shape[0]] * np.log(y_pred), axis=1))

def sgd(params, gradients, learning_rate):
    for key in params:
        param = np.asarray(params[key], dtype=np.float64)
        grad = np.asarray(gradients[key], dtype=np.float64)
        params[key] -= learning_rate * grad

def momentum_sgd(params, gradients, velocities, learning_rate, momentum):
    for param_name, grad in gradients.items():
        vel = momentum * velocities[param_name] + learning_rate * grad
        params[param_name] -= vel
        velocities[param_name] = vel

def gradient_clipping(gradients, threshold):
    for name, gradient in gradients.items():
        gradients[name] = np.clip(gradient, -threshold, threshold)

# Load iris data
iris = load_iris()
X, y = iris.data, iris.target
y_one_hot = np.eye(3)[y]
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

# Create neural network
num_hidden_units = 10
num_classes = 3
input_size = X_train.shape[1]

params = {
    'W1': np.random.randn(input_size, num_hidden_units),
    'b1': np.zeros(num_hidden_units),
    'W2': np.random.randn(num_hidden_units, num_classes),
    'b2': np.zeros(num_classes)
}

# Training loop
num_epochs = 100
learning_rate = 0.001
momentum = 0.9
threshold = 1.0
batch_size = 32

for epoch in range(num_epochs):
    X_train, y_train = shuffle_data(X_train, y_train)
    X_train_minibatches = create_minibatches(X_train, batch_size)
    y_train_minibatches = create_minibatches(y_train, batch_size)

    for X_minibatch, y_minibatch in zip(X_train_minibatches, y_train_minibatches):
        # Forward pass
        hidden_layer = relu(np.dot(X_minibatch, params['W1']) + params['b1'])
        output_layer = softmax(np.dot(hidden_layer, params['W2']) + params['b2'])

        # Backward pass
        output_error = output_layer - y_minibatch
        hidden_error = np.dot(output_error, params['W2'].T) * (hidden_layer > 0)

        output_grad_W = np.dot(hidden_layer.T, output_error)
        output_grad_b = np.sum(output_error, axis=0)

        hidden_grad_W = np.dot(X_minibatch.T, hidden_error)
        hidden_grad_b = np.sum(hidden_error, axis=0)

        # Update parameters
        gradients = {'W1': hidden_grad_W, 'b1': hidden_grad_b, 'W2': output_grad_W, 'b2': output_grad_b}
        gradient_clipping(gradients, threshold=threshold)
        velocities = {'W1': np.zeros_like(hidden_grad_W), 'b1': np.zeros_like(hidden_grad_b), 'W2': np.zeros_like(output_grad_W), 'b2': np.zeros_like(output_grad_b)}
        momentum_sgd(params, gradients=gradients, velocities=velocities, learning_rate=learning_rate, momentum=momentum)

    hidden_layer_test = relu(np.dot(X_test, params['W1']) + params['b1'])
    output_layer_test = softmax(np.dot(hidden_layer_test, params['W2']) + params['b2'])

    loss = cross_entropy(y_train, output_layer)
    test_loss = cross_entropy(y_test, output_layer_test)

    if epoch % 10 == 0:
       print("epoch: {}, train_loss: {:.3f}, test_loss: {:.3f}".format(epoch, loss , test_loss ))

epoch: 0, train_loss: 18.873, test_loss: 13.515
epoch: 10, train_loss: 16.056, test_loss: 10.824
epoch: 20, train_loss: 10.096, test_loss: 7.761
epoch: 30, train_loss: 4.861, test_loss: 4.844
epoch: 40, train_loss: 2.489, test_loss: 2.156
epoch: 50, train_loss: 1.817, test_loss: 1.272
epoch: 60, train_loss: 1.872, test_loss: 1.064
epoch: 70, train_loss: 1.999, test_loss: 0.919
epoch: 80, train_loss: 2.347, test_loss: 0.815
epoch: 90, train_loss: 2.665, test_loss: 0.730


In [18]:
# iris whith added layers
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


def map_function(data, func):
    return [func(x) for x in data]

def create_minibatches(data, batch_size):
    minibatches = []
    for i in range(0, len(data), batch_size):
        minibatch = data[i:i+batch_size]
        minibatches.append(minibatch)
    return minibatches

def shuffle_data(X, y):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], y[indices]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def cross_entropy(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(y_true * np.log(y_pred))

def sgd(params, gradients, learning_rate):
    for key in params:
        param = np.asarray(params[key], dtype=np.float64)
        grad = np.asarray(gradients[key], dtype=np.float64)
        params[key] -= learning_rate * grad

def momentum_sgd(params, gradients, velocities, learning_rate, momentum):
    for param, grad, vel in zip(params, gradients, velocities):
        vel = momentum * vel + learning_rate * grad
        param -= vel

def gradient_clipping(gradients, threshold):
    for gradient in gradients:
        np.clip(gradient, -threshold, threshold, out=gradient)

# Load iris data
iris = load_iris()
X, y = iris.data, iris.target
y_one_hot = np.eye(3)[y]
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)
X_train, y_train = shuffle_data(X_train, y_train)

# Create neural network
num_hidden_units = 10
num_classes = 3
input_size = X_train.shape[1]

params = {
    'W1': np.random.randn(input_size, num_hidden_units),
    'b1': np.zeros(num_hidden_units),
    'W2': np.random.randn(num_hidden_units, num_hidden_units),
    'b2': np.zeros(num_hidden_units),
    'W3': np.random.randn(num_hidden_units, num_classes),
    'b3': np.zeros(num_classes)
}

# Training loop
num_epochs = 100
learning_rate = 0.0001

for epoch in range(num_epochs):
    # Forward pass
    hidden_layer1 = relu(np.dot(X_train, params['W1']) + params['b1'])
    hidden_layer2 = relu(np.dot(hidden_layer1, params['W2']) + params['b2'])
    output_layer = softmax(np.dot(hidden_layer2, params['W3']) + params['b3'])
    hidden_layer_test1 = relu(np.dot(X_test, params['W1']) + params['b1'])
    hidden_layer_test2 = relu(np.dot(hidden_layer_test1, params['W2']) + params['b2'])
    output_layer_test = softmax(np.dot(hidden_layer_test2, params['W3']) + params['b3'])

    loss = cross_entropy(y_train, output_layer)
    test_loss = cross_entropy(y_test, output_layer_test)

    # Backward pass
    output_error = output_layer - y_train
    hidden_error2 = np.dot(output_error, params['W3'].T) * (hidden_layer2 > 0)
    hidden_error1 = np.dot(hidden_error2, params['W2'].T) * (hidden_layer1 > 0)

    output_grad_W = np.dot(hidden_layer2.T, output_error)
    output_grad_b = np.sum(output_error, axis=0)

    hidden_grad_W2 = np.dot(hidden_layer1.T, hidden_error2)
    hidden_grad_b2 = np.sum(hidden_error2, axis=0)

    hidden_grad_W1 = np.dot(X_train.T, hidden_error1)
    hidden_grad_b1 = np.sum(hidden_error1, axis=0)

    # Update parameters
    params['W1'] -= learning_rate * hidden_grad_W1
    params['b1'] -= learning_rate * hidden_grad_b1
    params['W2'] -= learning_rate * hidden_grad_W2
    params['b2'] -= learning_rate * hidden_grad_b2
    params['W3'] -= learning_rate * output_grad_W
    params['b3'] -= learning_rate * output_grad_b

    sgd(params, {'W1': hidden_grad_W1, 'b1': hidden_grad_b1, 'W2': hidden_grad_W2, 'b2': hidden_grad_b2, 'W3': output_grad_W, 'b3': output_grad_b}, learning_rate)

    if epoch % 10 == 0:
             print("epoch: {},  train_loss: {:.3f}, test_loss: {:.3f}".format(
                 epoch, loss , test_loss ))


epoch: 0,  train_loss: 4.063, test_loss: 3.876
epoch: 10,  train_loss: 0.327, test_loss: 0.331
epoch: 20,  train_loss: 0.314, test_loss: 0.289
epoch: 30,  train_loss: 0.199, test_loss: 0.187
epoch: 40,  train_loss: 0.210, test_loss: 0.196
epoch: 50,  train_loss: 0.172, test_loss: 0.162
epoch: 60,  train_loss: 0.147, test_loss: 0.141
epoch: 70,  train_loss: 0.130, test_loss: 0.127
epoch: 80,  train_loss: 0.117, test_loss: 0.116
epoch: 90,  train_loss: 0.108, test_loss: 0.108


In [104]:
# boston
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split


def map_function(data, func):
    return [func(x) for x in data]

def create_minibatches(data, batch_size):
    minibatches = []
    for i in range(0, len(data), batch_size):
        minibatch = data[i:i+batch_size]
        minibatches.append(minibatch)
    return minibatches

def shuffle_data(X, y):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], y[indices]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # для числовой стабильности
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def cross_entropy(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(np.sum(y_true[:y_pred.shape[0]] * np.log(y_pred), axis=1))

def sgd(params, gradients, learning_rate):
    for key in params:
        param = np.asarray(params[key], dtype=np.float64)
        grad = np.asarray(gradients[key], dtype=np.float64)
        params[key] -= learning_rate * grad

def momentum_sgd(params, gradients, velocities, learning_rate, momentum):
    for param_name, grad in gradients.items():
        vel = momentum * velocities[param_name] + learning_rate * grad
        params[param_name] -= vel
        velocities[param_name] = vel

def gradient_clipping(gradients, threshold):
    for name, gradient in gradients.items():
        gradients[name] = np.clip(gradient, -threshold, threshold)

# Load Boston dataset
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]


X, y = data, target
num_classes = 1
y_regression = y
y_regression = y.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y_regression, test_size=0.2, random_state=42)
X_train, y_train = shuffle_data(X_train, y_train)


# Create neural network
num_hidden_units = 64
input_size = X_train.shape[1]

params = {
    'W1': np.random.randn(input_size, num_hidden_units),
    'b1': np.zeros(num_hidden_units),
    'W2': np.random.randn(num_hidden_units, num_classes),
    'b2': np.zeros(num_classes)
}


# Training loop
num_epochs = 200000
learning_rate = 0.0000001


for epoch in range(num_epochs):
    # Forward pass
    hidden_layer = relu(np.dot(X_train, params['W1']) + params['b1'])
    output_layer = np.dot(hidden_layer, params['W2']) + params['b2']
    hidden_layer_test = relu(np.dot(X_test, params['W1']) + params['b1'])
    output_layer_test = np.dot(hidden_layer_test, params['W2']) + params['b2']

    loss = mean_squared_error(y_train, output_layer)
    test_loss = mean_squared_error(y_test, output_layer_test)

    # Backward pass
    output_error = output_layer - y_train
    hidden_error = np.dot(output_error, params['W2'].T)* (hidden_layer > 0)

    output_grad_W = np.dot(hidden_layer.T, output_error)
    output_grad_b = np.sum(output_error, axis=0)

    hidden_grad_W = np.dot(X_train.T, hidden_error)
    hidden_grad_b = np.sum(hidden_error, axis=0)


    # Update parameters
    params['W1'] -= learning_rate * hidden_grad_W
    params['b1'] -= learning_rate * hidden_grad_b
    params['W2'] -= learning_rate * output_grad_W
    params['b2'] -= learning_rate * output_grad_b

    sgd(params, {'W1': hidden_grad_W, 'b1': hidden_grad_b, 'W2': output_grad_W, 'b2': output_grad_b}, learning_rate)

    if epoch % 10000 == 0:
             print("epoch: {},  train_loss: {:.3f}, test_loss: {:.3f}".format(
                 epoch, loss , test_loss ))



epoch: 0,  train_loss: 19993402.570, test_loss: 21173128.479
epoch: 10000,  train_loss: 84052786.000, test_loss: 84028785.128
epoch: 20000,  train_loss: 16699585.269, test_loss: 16688880.705
epoch: 30000,  train_loss: 3317924.963, test_loss: 3313147.017
epoch: 40000,  train_loss: 659271.319, test_loss: 657135.070
epoch: 50000,  train_loss: 131052.919, test_loss: 130094.164
epoch: 60000,  train_loss: 26107.064, test_loss: 25673.158
epoch: 70000,  train_loss: 5256.536, test_loss: 5056.574
epoch: 80000,  train_loss: 1113.976, test_loss: 1018.291
epoch: 90000,  train_loss: 290.937, test_loss: 241.731
epoch: 100000,  train_loss: 127.417, test_loss: 98.928
epoch: 110000,  train_loss: 94.928, test_loss: 75.674
epoch: 120000,  train_loss: 88.474, test_loss: 73.336
epoch: 130000,  train_loss: 87.191, test_loss: 73.888
epoch: 140000,  train_loss: 86.937, test_loss: 74.451
epoch: 150000,  train_loss: 86.886, test_loss: 74.765
epoch: 160000,  train_loss: 86.876, test_loss: 74.917
epoch: 170000,  t

In [91]:
# MNIST
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml

def map_function(data, func):
    return [func(x) for x in data]

def create_minibatches(data, batch_size):
    minibatches = []
    for i in range(0, len(data), batch_size):
        minibatch = data[i:i+batch_size]
        minibatches.append(minibatch)
    return minibatches

def shuffle_data(X, y):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    return X[indices], y[indices]

def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    x_clipped = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x_clipped))

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def cross_entropy(y_true, y_pred):
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    return -np.mean(np.sum(y_true[:y_pred.shape[0]] * np.log(y_pred), axis=1))


def sgd(params, gradients, learning_rate):
    for key in params:
        param = np.asarray(params[key], dtype=np.float64)
        grad = np.asarray(gradients[key], dtype=np.float64)
        params[key] -= learning_rate * grad

def momentum_sgd(params, gradients, velocities, learning_rate, momentum):
    for param, grad, vel in zip(params, gradients, velocities):
        vel = momentum * vel + learning_rate * grad
        param -= vel

def gradient_clipping(gradients, threshold):
    for gradient in gradients:
        np.clip(gradient, -threshold, threshold, out=gradient)


# Load MNIST data
mnist = fetch_openml('mnist_784', parser='auto', version=1)
X, y = mnist.data, mnist.target.astype(int)
y_one_hot = np.eye(10)[y]
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)
X_train = X_train.values
X_train, y_train = shuffle_data(X_train, y_train)


# Create neural network
num_hidden_units = 256
num_classes = 10
input_size = X_train.shape[1]

params = {
    'W1': np.random.randn(input_size, num_hidden_units),
    'b1': np.zeros(num_hidden_units),
    'W2': np.random.randn(num_hidden_units, num_classes),
    'b2': np.zeros(num_classes)
}

# Training loop
num_epochs = 200
learning_rate = 0.001

for epoch in range(num_epochs):
    # Forward pass
    hidden_layer = sigmoid(np.dot(X_train, params['W1']) + params['b1'])
    output_layer = softmax(np.dot(hidden_layer, params['W2']) + params['b2'])
    hidden_layer_test = sigmoid(np.dot(X_test, params['W1']) + params['b1'])
    output_layer_test = softmax(np.dot(hidden_layer_test, params['W2']) + params['b2'])

    loss = cross_entropy(y_train, output_layer)
    test_loss = cross_entropy(y_test, output_layer_test)


    # Backward pass
    output_error = output_layer - y_train
    hidden_error = np.dot(output_error, params['W2'].T) * (hidden_layer > 0)

    output_grad_W = np.dot(hidden_layer.T, output_error)
    output_grad_b = np.sum(output_error, axis=0)

    hidden_grad_W = np.dot(X_train.T, hidden_error)
    hidden_grad_b = np.sum(hidden_error, axis=0)

    # Update parameters
    params['W1'] -= learning_rate * hidden_grad_W
    params['b1'] -= learning_rate * hidden_grad_b
    params['W2'] -= learning_rate * output_grad_W
    params['b2'] -= learning_rate * output_grad_b

    sgd(params, {'W1': hidden_grad_W, 'b1': hidden_grad_b, 'W2': output_grad_W, 'b2': output_grad_b}, learning_rate)

    if epoch % 10 == 0:
        print("epoch: {},  train_loss: {:.3f}, test_loss: {:.3f}".format(epoch, loss , test_loss ))

epoch: 0,  train_loss: 14.346, test_loss: 14.415
epoch: 10,  train_loss: 18.746, test_loss: 18.652
epoch: 20,  train_loss: 17.030, test_loss: 16.950
epoch: 30,  train_loss: 13.423, test_loss: 13.326
epoch: 40,  train_loss: 13.677, test_loss: 13.724
epoch: 50,  train_loss: 6.818, test_loss: 6.846
epoch: 60,  train_loss: 5.581, test_loss: 5.555
epoch: 70,  train_loss: 4.402, test_loss: 4.419
epoch: 80,  train_loss: 4.186, test_loss: 4.193
epoch: 90,  train_loss: 4.960, test_loss: 4.941
epoch: 100,  train_loss: 4.259, test_loss: 4.277
epoch: 110,  train_loss: 3.936, test_loss: 3.964
epoch: 120,  train_loss: 3.227, test_loss: 3.207
epoch: 130,  train_loss: 2.885, test_loss: 2.888
epoch: 140,  train_loss: 2.591, test_loss: 2.658
epoch: 150,  train_loss: 3.237, test_loss: 3.315
epoch: 160,  train_loss: 3.100, test_loss: 3.137
epoch: 170,  train_loss: 2.926, test_loss: 2.959
epoch: 180,  train_loss: 2.983, test_loss: 2.994
epoch: 190,  train_loss: 2.899, test_loss: 2.961
