Создать нейронную сеть с нуля, т.е. не используя готовые библиотеки. Пример работы на любом табличном датасете. 
Сделать класс, в котором реализована возможность задать количество нейронов в скрытом слое и провести обучение.

In [138]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Загружаем датасет с данными о заболеваниях сердца, делим на обучающую и тестовую выборку, в качестве целевой переменной беру Disease (0 - нет патологии, 1 - есть). Так же выполнила one-hot encoding и нормализацию

In [139]:
def load_dataset():
    df = pd.read_csv('heart_disease.csv')
    df_encoded = pd.get_dummies(df, drop_first=True)

    X = df_encoded.drop('Disease', axis=1).values
    y = df_encoded['Disease'].values.reshape(-1, 1)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    print(f"\nРазмер обучающей выборки: {X_train.shape}")
    print(f"Размер тестовой выборки: {X_test.shape}")
    
    return X_train, X_test, y_train, y_test, df.columns.tolist()

In [140]:
# y = X·W + b
def linear_regression(X: np.ndarray, weights: np.ndarray, bias: float) -> np.ndarray:
    return np.dot(X, weights) + bias

# f(x) = 1 / (1 + exp(-x))
def activation_function(x: np.ndarray) -> np.ndarray:
    return 1 / (1 + np.exp(-x))

# производная
def activation_derivative(x: np.ndarray) -> np.ndarray:
    fx = activation_function(x)
    return fx * (1 - fx)


Определение нейросети

In [141]:
def softmax(x, axis=-1):
    exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

In [142]:
class SimpleNeuron:
    def __init__(self, input_size: int, hidden_neurons: int = 0):
        self.input_size = input_size
        self.hidden_neurons = hidden_neurons
        scale = np.sqrt(2.0 / input_size)
        
        if hidden_neurons > 0:
            self.W1 = np.random.randn(input_size, hidden_neurons) * scale
            self.b1 = np.zeros((1, hidden_neurons))
            self.W2 = np.random.randn(hidden_neurons, 1) * np.sqrt(2.0 / hidden_neurons)
            self.b2 = 0.0
        else:
            self.weights = np.random.randn(input_size) * scale
            self.bias = 0.0
        
    def forward(self, X: np.ndarray) -> np.ndarray:
        self.X = X
        
        if self.hidden_neurons > 0:
            self.z1 = np.dot(X, self.W1) + self.b1
            self.a1 = activation_function(self.z1)
            
            self.z2 = np.dot(self.a1, self.W2) + self.b2
            self.output = activation_function(self.z2)
        else:
            self.linear_output = np.dot(X, self.weights) + self.bias
            self.output = activation_function(self.linear_output)
        
        if len(self.output.shape) == 1:
            self.output = self.output.reshape(-1, 1)
            
        return self.output
    
    def backward(self, y_true: np.ndarray, learning_rate: float = 0.01) -> float:
        if len(y_true.shape) == 1:
            y_true = y_true.reshape(-1, 1)

        m = self.X.shape[0]

        error = y_true - self.output
        
        if self.hidden_neurons > 0:
            delta2 = error * activation_derivative(self.z2)
            dW2 = np.dot(self.a1.T, delta2) / m
            db2 = np.mean(delta2, axis=0)

            delta1 = np.dot(delta2, self.W2.T) * activation_derivative(self.z1)
            dW1 = np.dot(self.X.T, delta1) / m
            db1 = np.mean(delta1, axis=0)

            self.W2 += learning_rate * dW2
            self.b2 += learning_rate * db2
            self.W1 += learning_rate * dW1
            self.b1 += learning_rate * db1.reshape(1, -1)
        else:

            delta = error * activation_derivative(self.linear_output)

            dW = np.zeros_like(self.weights)
            for i in range(m):
                dW += self.X[i] * delta[i, 0]
            dW /= m
            
            db = np.mean(delta)

            self.weights += learning_rate * dW
            self.bias += learning_rate * db

        return np.mean(error ** 2)
    
    def train(self, X: np.ndarray, y: np.ndarray, epochs: int = 1000, learning_rate: float = 0.01) -> list:
        loss_history = []
        
        for epoch in range(epochs):
            self.forward(X)
            loss = self.backward(y, learning_rate)
            loss_history.append(loss)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, loss: {loss:.6f}")
                
        return loss_history
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        return self.forward(X)

In [143]:
def test_simple_neuron(X_train, X_test, y_train, y_test, hidden_neurons=0):

    print(f"{hidden_neurons} нейронов в скрытом слое\n")

    input_size = X_train.shape[1]
    neuron = SimpleNeuron(input_size=input_size, hidden_neurons=hidden_neurons)

    loss_history = neuron.train(X_train, y_train, epochs=1000, learning_rate=0.01)

    predictions = neuron.predict(X_test)
    predicted_classes = (predictions > 0.5).astype(int)

    accuracy = np.mean(predicted_classes == y_test)

    tp = np.sum((predicted_classes == 1) & (y_test == 1))
    tn = np.sum((predicted_classes == 0) & (y_test == 0))
    fp = np.sum((predicted_classes == 1) & (y_test == 0))
    fn = np.sum((predicted_classes == 0) & (y_test == 1))
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    print(f"\nAccuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}\n")
    
    for i in range(10):
        print(f"Real {y_test[i][0]}")
        print(f"Predictions {predictions[i][0]:.4f}")
        print(f"Predicted {predicted_classes[i][0]}\n")
    
    return accuracy, precision, recall, f1

In [144]:
X_train, X_test, y_train, y_test, column_names = load_dataset()
simple_neuron_metrics = test_simple_neuron(X_train, X_test, y_train, y_test, 50)


Размер обучающей выборки: (216, 13)
Размер тестовой выборки: (54, 13)
50 нейронов в скрытом слое

Epoch 0, loss: 0.346677
Epoch 100, loss: 0.280854
Epoch 200, loss: 0.247715
Epoch 300, loss: 0.232044
Epoch 400, loss: 0.221455
Epoch 500, loss: 0.212656
Epoch 600, loss: 0.204915
Epoch 700, loss: 0.198016
Epoch 800, loss: 0.191844
Epoch 900, loss: 0.186310

Accuracy: 0.8519
Precision: 0.8824
Recall: 0.7143
F1-score: 0.7895

Real 1
Predictions 0.5084
Predicted 1

Real 1
Predictions 0.4971
Predicted 0

Real 0
Predictions 0.3578
Predicted 0

Real 0
Predictions 0.2734
Predicted 0

Real 0
Predictions 0.4322
Predicted 0

Real 1
Predictions 0.5060
Predicted 1

Real 0
Predictions 0.4216
Predicted 0

Real 0
Predictions 0.3312
Predicted 0

Real 0
Predictions 0.5260
Predicted 1

Real 0
Predictions 0.3955
Predicted 0



Реализовать GPT как в п.2 

In [145]:
def softmax(x, axis=-1):
    exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

In [146]:
class Head:
    def __init__(self, input_size, head_size=None, dropout=0.0):
        self.input_size = input_size
        self.head_size = head_size if head_size is not None else input_size
        self.dropout = dropout

        scale = np.sqrt(2.0 / input_size)

        self.key_weights = np.random.randn(input_size, self.head_size) * scale
        self.query_weights = np.random.randn(input_size, self.head_size) * scale 
        self.value_weights = np.random.randn(input_size, self.head_size) * scale

        self.tril = np.tril(np.ones((input_size, input_size)))

        self.output_weights = np.random.randn(self.head_size, 1) * scale
        self.output_bias = 0.0
    
    def apply_dropout(self, x):
        if self.dropout > 0:
            mask = np.random.rand(*x.shape) > self.dropout
            return x * mask / (1 - self.dropout)
        return x
    
    def forward(self, x):
        self.X = x
        if len(x.shape) == 1:
            x = x.reshape(1, -1)
        
        batch_size = x.shape[0]
        k = np.dot(x, self.key_weights)
        q = np.dot(x, self.query_weights)
        v = np.dot(x, self.value_weights)

        wei = np.matmul(q, k.T) / np.sqrt(k.shape[1])

        wei = softmax(wei, axis=-1)

        self.attention_weights = self.apply_dropout(wei)

        self.attention_output = np.matmul(self.attention_weights, v)

        self.linear_output = np.dot(self.attention_output, self.output_weights) + self.output_bias
        self.output = activation_function(self.linear_output)

        if len(self.output.shape) == 1:
            self.output = self.output.reshape(-1, 1)
            
        return self.output
    
    def backward(self, y_true, learning_rate=0.01):
        if len(y_true.shape) == 1:
            y_true = y_true.reshape(-1, 1)
        
        batch_size = self.X.shape[0]

        error = y_true - self.output
        loss = np.mean(error ** 2)

        delta_output = error * activation_derivative(self.linear_output)

        dW_output = np.dot(self.attention_output.T, delta_output) / batch_size
        db_output = np.mean(delta_output)

        delta_attention = np.dot(delta_output, self.output_weights.T)

        dv = np.dot(self.attention_weights.T, delta_attention)
        dW_value = np.dot(self.X.T, dv) / batch_size

        dk_dq_factor = delta_attention.sum(axis=1, keepdims=True) / np.sqrt(self.head_size)
        dW_key = np.dot(self.X.T, dk_dq_factor) / batch_size
        dW_query = np.dot(self.X.T, dk_dq_factor) / batch_size
        
        self.output_weights += learning_rate * dW_output
        self.output_bias += learning_rate * db_output
        self.value_weights += learning_rate * dW_value
        self.key_weights += learning_rate * dW_key
        self.query_weights += learning_rate * dW_query
        
        return loss
    
    def train(self, X, y, epochs=1000, learning_rate=0.01):
        loss_history = []
        
        for epoch in range(epochs):
            self.forward(X)
            loss = self.backward(y, learning_rate)
            loss_history.append(loss)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, loss: {loss:.6f}")
                
        print(f"Epoch {epochs}, loss: {loss:.6f}")
        
        return loss_history
    
    def predict(self, X):
        return self.forward(X)
    
    def evaluate(self, X_test, y_test):
        predictions = self.predict(X_test)
        predicted_classes = (predictions > 0.5).astype(int)
        
        accuracy = np.mean(predicted_classes == y_test)
        
        tp = np.sum((predicted_classes == 1) & (y_test == 1))
        tn = np.sum((predicted_classes == 0) & (y_test == 0))
        fp = np.sum((predicted_classes == 1) & (y_test == 0))
        fn = np.sum((predicted_classes == 0) & (y_test == 1))
        
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

        print(f"\nAccuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}\n")

        for i in range(10):
            print(f"Real {y_test[i][0]}")
            print(f"Predictions {predictions[i][0]:.4f}")
            print(f"Predicted {predicted_classes[i][0]}\n")
            
        return accuracy, precision, recall, f1

In [147]:
def test_simple_head_heart(X_train, X_test, y_train, y_test, head_size=None, dropout=0.0):
    input_size = X_train.shape[1]
    model = Head(input_size=input_size, head_size=head_size, dropout=dropout)
    loss_history = model.train(X_train, y_train, epochs=1000, learning_rate=0.01)
    metrics = model.evaluate(X_test, y_test)
    
    return metrics

In [148]:
head_size = X_train.shape[1] // 2
simple_head_metrics = test_simple_head_heart(X_train, X_test, y_train, y_test, head_size=head_size, dropout=0.1)

Epoch 0, loss: 0.348695
Epoch 100, loss: 0.316806
Epoch 200, loss: 0.261612
Epoch 300, loss: 0.225564
Epoch 400, loss: 0.203725
Epoch 500, loss: 0.187189
Epoch 600, loss: 0.176765
Epoch 700, loss: 0.168139
Epoch 800, loss: 0.163878
Epoch 900, loss: 0.160143
Epoch 1000, loss: 0.157642

Accuracy: 0.6852, Precision: 0.6667, Recall: 0.3810, F1: 0.4848

Real 1
Predictions 0.4467
Predicted 0

Real 1
Predictions 0.8549
Predicted 1

Real 0
Predictions 0.1879
Predicted 0

Real 0
Predictions 0.3854
Predicted 0

Real 0
Predictions 0.4727
Predicted 0

Real 1
Predictions 0.4989
Predicted 0

Real 0
Predictions 0.3272
Predicted 0

Real 0
Predictions 0.2779
Predicted 0

Real 0
Predictions 0.2113
Predicted 0

Real 0
Predictions 0.2837
Predicted 0

