# Первая нейронная сеть

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
import sys
{sys.executable}

{'/home/andrew/dl_studying/venv/bin/python'}

In [3]:
# !'/home/andrew/dl_studying/venv/bin/python' -m pip install -U scikit-learn

## Нейронная сеть, состоящая из одного нейрона

In [42]:
from keras.layers import Dense, Input
from keras.models import Sequential
from keras import ops

In [57]:
model = Sequential([
    Dense(1, input_shape=(1,), activation='relu', name='layer1')
])


In [58]:
model.summary()

In [59]:
model.layers

[<Dense name=layer1, built=True>]

In [60]:
# смотрим на обучаемые параметры (веса)
model.get_weights()

[array([[1.6179944]], dtype=float32), array([0.], dtype=float32)]

При этом инициализация весов всегда разная. Для фиксации значний весов используем библиотеку TensorFlow:

In [8]:
import tensorflow as tf
tf.random.set_seed(1)

model = Sequential([
    Dense(1, input_shape=(1,), activation='relu')
])

model.get_weights()

[array([[-1.4028273]], dtype=float32), array([0.], dtype=float32)]

!!! Странно. Но ничео не зафиксировалось... Почему?

## Нейрон для умножения на 3

Создаем обучающие данные:

In [9]:
import numpy as np

X = np.array([[1], [3], [2], [10], [4], [7], [8]])
y = np.array([[3, 9, 6, 30, 12, 21, 24]]).T

In [49]:
X.shape, y.shape

((50, 2), TensorShape([1, 1]))

### Какая архитектура?

In [10]:
from keras.layers import Dense
from keras.models import Sequential

model = Sequential([
    Dense(1, input_shape=(1,), activation='linear')
])

model.summary()

In [11]:
w1, w0 = model.get_weights()
w1, w0

(array([[0.54572666]], dtype=float32), array([0.], dtype=float32))

Сделаем предсказание этой моделью на одном объекте:

In [12]:
X[:1]

array([[1]])

In [13]:
model.predict(X[:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


array([[0.54572666]], dtype=float32)

In [14]:
w1 * X[:1] + w0

array([[0.54572666]])

In [15]:
from keras.activations import linear
linear(w1 * X[:1] + w0)

array([[0.54572666]])

### Что оптимизируем?

В данном случае оптимизируем функцию потерь для задачи регрессии, к примеру MSE.

### Как оптимизируем?

Выбираем способ изменения весов

In [16]:
model.compile(optimizer='sgd', loss='mse', metrics=['mae'])

Запускаем обучение

In [17]:
%%time
model.fit(X, y, epochs=100)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step - loss: 209.1000 - mae: 12.2714
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 17.7615 - mae: 3.5061
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 1.5366 - mae: 0.9717
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.1604 - mae: 0.3109
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0434 - mae: 0.1916
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0332 - mae: 0.1648
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.0320 - mae: 0.1564
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0316 - mae: 0.1533
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss:

<keras.src.callbacks.history.History at 0x7f8b4b595e90>

Проверим, как работает модель:

In [18]:
user_input1, user_input2 = 5, -9
print(f"Проверка на новых данных: {user_input1} {user_input2}")
print("Предсказание нейронной сети: ")
print(model.predict(np.array([[user_input1], [user_input2]])))

Проверка на новых данных: 5 -9
Предсказание нейронной сети: 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[[ 15.0552  ]
 [-26.535477]]


Сравним веса до и после обучения:

In [19]:
nw1, nw0 = model.get_weights()
print('w1 before', w1, 'w1 after', nw1)
print('w0 before', w0, 'w0 after', nw0)

w1 before [[0.54572666]] w1 after [[2.9707627]]
w0 before [0.] w0 after [0.2013867]


Провеим на всех обучающих данных:

In [20]:
import pandas as pd

pd.DataFrame({
    'true': np.squeeze(y),
    'pred': np.squeeze(model.predict(X))
})

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


Unnamed: 0,true,pred
0,3,3.172149
1,9,9.113675
2,6,6.142912
3,30,29.909014
4,12,12.084437
5,21,20.996727
6,24,23.967489


## Сеть для сложения чисел

In [21]:
X1 = np.random.randint(1, 10, size=50)
X2 = np.random.randint(1, 10, size=50)

y = X1 + X2

In [22]:
X = np.vstack([X1, X2]).T
X

array([[3, 8],
       [4, 1],
       [1, 8],
       [4, 6],
       [2, 3],
       [9, 9],
       [3, 5],
       [3, 6],
       [3, 3],
       [5, 2],
       [5, 2],
       [4, 9],
       [4, 3],
       [4, 8],
       [3, 4],
       [9, 3],
       [4, 3],
       [8, 4],
       [6, 1],
       [4, 1],
       [8, 9],
       [7, 3],
       [2, 5],
       [3, 8],
       [6, 9],
       [9, 2],
       [6, 1],
       [1, 3],
       [1, 5],
       [7, 9],
       [1, 6],
       [3, 6],
       [2, 5],
       [9, 3],
       [5, 4],
       [4, 7],
       [7, 9],
       [6, 5],
       [6, 9],
       [2, 4],
       [5, 4],
       [6, 1],
       [1, 3],
       [4, 2],
       [4, 8],
       [5, 9],
       [7, 4],
       [2, 2],
       [4, 3],
       [5, 5]])

In [23]:
y = y[None]
y = y.T
y

array([[11],
       [ 5],
       [ 9],
       [10],
       [ 5],
       [18],
       [ 8],
       [ 9],
       [ 6],
       [ 7],
       [ 7],
       [13],
       [ 7],
       [12],
       [ 7],
       [12],
       [ 7],
       [12],
       [ 7],
       [ 5],
       [17],
       [10],
       [ 7],
       [11],
       [15],
       [11],
       [ 7],
       [ 4],
       [ 6],
       [16],
       [ 7],
       [ 9],
       [ 7],
       [12],
       [ 9],
       [11],
       [16],
       [11],
       [15],
       [ 6],
       [ 9],
       [ 7],
       [ 4],
       [ 6],
       [12],
       [14],
       [11],
       [ 4],
       [ 7],
       [10]])

Лучше сделать масштабирование данных:

In [24]:
from sklearn.preprocessing import MinMaxScaler

In [25]:
mms = MinMaxScaler()
X_norm = mms.fit_transform(X)

### Архитектура сети

In [26]:
from keras.layers import Dense
from keras.models import Sequential
tf.random.set_seed(9)

model = Sequential([
    Dense(3, input_shape=(2,), activation='linear'),
    Dense(1, activation='linear')
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [27]:
model.get_weights()

[array([[ 0.8895602 ,  0.7155596 ,  0.01073325],
        [ 0.4276173 , -0.01703227,  0.70407724]], dtype=float32),
 array([0., 0., 0.], dtype=float32),
 array([[-0.54548126],
        [-0.6874333 ],
        [-1.1379234 ]], dtype=float32),
 array([0.], dtype=float32)]

### Что оптимизируем?

Оптимизируем функцию потерь MSE

### Как оптимизиреум?

In [28]:
model.compile(optimizer='sgd', loss='mse', metrics=['mae'])

In [29]:
%%time
model.fit(X_norm, y, epochs=200)

Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 117.1505 - mae: 10.2563  
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 87.9172 - mae: 8.7637  
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 66.7625 - mae: 7.4987 
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 45.3604 - mae: 5.9483 
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 26.0879 - mae: 4.2198 
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 13.9464 - mae: 2.9062 
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 8.9077 - mae: 2.4278  
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 7.2584 - mae: 2.2295 
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - 

<keras.src.callbacks.history.History at 0x7f8b30fa7d90>

### Проверка модели на новых данных

In [30]:
test_X = [[4, 2],
          [6, 2]]
test_X = mms.transform(test_X)
print("Предсказание нейронной сети: ")
print(model.predict(np.array(test_X)))

Предсказание нейронной сети: 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[[6.001256]
 [7.999463]]


Проверим на всех обучающих данных:

In [31]:
import pandas as pd

In [32]:
pd.DataFrame({
    'x1': X[:, 0],
    'x2': X[:, 1],
    'true': np.squeeze(y),
    'pred': np.squeeze(model.predict(X_norm))
}).head(10)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


Unnamed: 0,x1,x2,true,pred
0,3,8,11,11.001144
1,4,1,5,5.001424
2,1,8,9,9.002936
3,4,6,10,10.000584
4,2,3,5,5.00288
5,9,9,18,17.995598
6,3,5,8,8.001648
7,3,6,9,9.001479
8,3,3,6,6.001984
9,5,2,7,7.00036
