# Введение в нейронные сети

## Урок 2. Keras

### Практическое задание

1) Попробуйте обучить, нейронную сеть на Keras (рассмотренную на уроке) на датасете MNIST с другими параметрами. Напишите в комментарии к уроку:
- Какого результата вы добились от нейросети?
- Что помогло вам улучшить её точность?

2) Поработайте с документацией Keras. Попробуйте найти полезные команды Keras, неразобранные на уроке.

### Решение

Изучение документации TensorFlow показывает наличие множества функций активации в библиотеке <code>tensorflow.nn</code>.

Попробуем применить несколько различных функций активации и сравнить качество с исходной моделью с урока. Также изменим метод оптимизации и количество нейронов и слоёв.

#### Исходный код с урока

##### Импорт библиотек

In [1]:
import numpy as np 
import pandas as pd

import mnist

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

from sklearn.metrics import classification_report

#### Загрузка и подготовка данных данных

In [2]:
# The first time you run this might be a bit slow, since the
# mnist package has to download and cache the data.
train_images = mnist.train_images()
train_labels = mnist.train_labels()

test_images = mnist.test_images()
test_labels = mnist.test_labels()

# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

print(train_images.shape) # (60000, 28, 28)
print(train_labels.shape) # (60000,)

(60000, 28, 28)
(60000,)


In [3]:
# Flatten the images.
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))

print(train_images.shape) # (60000, 784)
print(test_images.shape)  # (10000, 784)

(60000, 784)
(10000, 784)


##### Построение модели

In [4]:
# альтернативная запись
model = Sequential([
  Dense(64, activation='relu', input_shape=(784,)),
  Dense(64, activation='relu'),
  Dense(10, activation='sigmoid'),
])

In [5]:
# создание keras модели
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [6]:
%%time

model.fit(train_images, 
          to_categorical(train_labels), 
          epochs=5, 
          batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Wall time: 5.45 s


<keras.callbacks.History at 0x237a19a7910>

##### Валидация модели

In [7]:
# Evaluate the model.
model.evaluate(
  test_images,
  to_categorical(test_labels)
)



[0.13357968628406525, 0.958899974822998]

In [8]:
df_metrics = pd.DataFrame()

df_metrics.loc['BaseLine', 'time'] = '5.27 s'
df_metrics.loc['BaseLine', 'Accuracy'] = model.evaluate(
  test_images,
  to_categorical(test_labels)
)[1]

df_metrics



Unnamed: 0,time,Accuracy
BaseLine,5.27 s,0.9589


In [9]:
%%time

predictions = model.predict(test_images)
predictions = np.argmax(predictions, axis=1)

Wall time: 227 ms


In [10]:
print(classification_report(test_labels, predictions))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99       980
           1       0.99      0.98      0.99      1135
           2       0.97      0.96      0.97      1032
           3       0.95      0.95      0.95      1010
           4       0.99      0.89      0.94       982
           5       0.91      0.99      0.95       892
           6       0.98      0.96      0.97       958
           7       0.97      0.94      0.96      1028
           8       0.94      0.98      0.96       974
           9       0.90      0.96      0.93      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000



#### Функция активации "tanh"

In [11]:
from tensorflow.nn import tanh

In [12]:
%%time

model_2 = Sequential([
  Dense(64, activation=tanh, input_shape=(784,)),
  Dense(64, activation=tanh),
  Dense(10, activation='sigmoid'),
])

model_2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model_2.fit(train_images,
            to_categorical(train_labels),
            epochs=5,
            batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Wall time: 5.26 s


<keras.callbacks.History at 0x237a7e1dd60>

In [13]:
df_metrics.loc['tanh', 'time'] = '5.32 s'
df_metrics.loc['tanh', 'Accuracy'] = model_2.evaluate(
  test_images,
  to_categorical(test_labels)
)[1]

df_metrics



Unnamed: 0,time,Accuracy
BaseLine,5.27 s,0.9589
tanh,5.32 s,0.961


In [14]:
predictions = model_2.predict(test_images)
predictions = np.argmax(predictions, axis=1)
print(classification_report(test_labels, predictions))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       980
           1       0.99      0.98      0.99      1135
           2       0.98      0.92      0.95      1032
           3       0.93      0.98      0.95      1010
           4       0.96      0.97      0.96       982
           5       0.94      0.96      0.95       892
           6       0.97      0.98      0.97       958
           7       0.95      0.97      0.96      1028
           8       0.96      0.93      0.94       974
           9       0.95      0.95      0.95      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000



#### Функция активации "sigmoid"

In [15]:
from tensorflow.nn import sigmoid

In [16]:
%%time

model_3 = Sequential([
  Dense(64, activation=sigmoid, input_shape=(784,)),
  Dense(64, activation=sigmoid),
  Dense(10, activation='sigmoid'),
])

model_3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model_3.fit(train_images,
            to_categorical(train_labels),
            epochs=5,
            batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Wall time: 5.28 s


<keras.callbacks.History at 0x237a804c250>

In [17]:
df_metrics.loc['sigmoid', 'time'] = '5.32 s'
df_metrics.loc['sigmoid', 'Accuracy'] = model_3.evaluate(
  test_images,
  to_categorical(test_labels)
)[1]

df_metrics



Unnamed: 0,time,Accuracy
BaseLine,5.27 s,0.9589
tanh,5.32 s,0.961
sigmoid,5.32 s,0.9629


In [18]:
predictions = model_3.predict(test_images)
predictions = np.argmax(predictions, axis=1)
print(classification_report(test_labels, predictions))

              precision    recall  f1-score   support

           0       0.96      0.98      0.97       980
           1       0.97      0.99      0.98      1135
           2       0.98      0.95      0.96      1032
           3       0.97      0.96      0.96      1010
           4       0.97      0.94      0.96       982
           5       0.96      0.96      0.96       892
           6       0.96      0.97      0.96       958
           7       0.96      0.96      0.96      1028
           8       0.95      0.97      0.96       974
           9       0.94      0.96      0.95      1009

    accuracy                           0.96     10000
   macro avg       0.96      0.96      0.96     10000
weighted avg       0.96      0.96      0.96     10000



#### Оптимизатор "SGD"

In [19]:
%%time

model_4 = Sequential([
  Dense(64, activation='relu', input_shape=(784,)),
  Dense(64, activation='relu'),
  Dense(10, activation='sigmoid'),
])

model_4.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy'])

model_4.fit(train_images,
            to_categorical(train_labels),
            epochs=5,
            batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Wall time: 5.06 s


<keras.callbacks.History at 0x237a80c6580>

In [20]:
df_metrics.loc['SGD', 'time'] = '4.97 s'
df_metrics.loc['SGD', 'Accuracy'] = model_4.evaluate(
  test_images,
  to_categorical(test_labels)
)[1]

df_metrics



Unnamed: 0,time,Accuracy
BaseLine,5.27 s,0.9589
tanh,5.32 s,0.961
sigmoid,5.32 s,0.9629
SGD,4.97 s,0.9413


In [21]:
predictions = model_4.predict(test_images)
predictions = np.argmax(predictions, axis=1)
print(classification_report(test_labels, predictions))

              precision    recall  f1-score   support

           0       0.96      0.98      0.97       980
           1       0.96      0.98      0.97      1135
           2       0.94      0.93      0.94      1032
           3       0.94      0.93      0.94      1010
           4       0.93      0.95      0.94       982
           5       0.93      0.92      0.93       892
           6       0.93      0.96      0.95       958
           7       0.94      0.94      0.94      1028
           8       0.93      0.90      0.92       974
           9       0.93      0.91      0.92      1009

    accuracy                           0.94     10000
   macro avg       0.94      0.94      0.94     10000
weighted avg       0.94      0.94      0.94     10000



#### Сокращение нейронов и увеличение слоёв

In [22]:
%%time

model_5 = Sequential([
  Dense(16, activation='relu', input_shape=(784,)),
  Dense(16, activation='relu'),
  Dense(16, activation='relu'),
  Dense(16, activation='relu'),
  Dense(16, activation='relu'),
  Dense(16, activation='relu'),
  Dense(16, activation='relu'),
  Dense(16, activation='relu'),
  Dense(10, activation='sigmoid'),
])

model_5.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model_5.fit(train_images,
            to_categorical(train_labels),
            epochs=5,
            batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Wall time: 6.12 s


<keras.callbacks.History at 0x237a8099040>

In [23]:
df_metrics.loc['reshape', 'time'] = '6.18 s'
df_metrics.loc['reshape', 'Accuracy'] = model_5.evaluate(
  test_images,
  to_categorical(test_labels)
)[1]

df_metrics



Unnamed: 0,time,Accuracy
BaseLine,5.27 s,0.9589
tanh,5.32 s,0.961
sigmoid,5.32 s,0.9629
SGD,4.97 s,0.9413
reshape,6.18 s,0.9078


In [24]:
predictions = model_5.predict(test_images)
predictions = np.argmax(predictions, axis=1)
print(classification_report(test_labels, predictions))

              precision    recall  f1-score   support

           0       0.93      0.97      0.95       980
           1       0.90      0.98      0.94      1135
           2       0.95      0.88      0.91      1032
           3       0.96      0.84      0.90      1010
           4       0.92      0.94      0.93       982
           5       0.86      0.80      0.83       892
           6       0.92      0.96      0.94       958
           7       0.90      0.95      0.93      1028
           8       0.84      0.84      0.84       974
           9       0.90      0.89      0.90      1009

    accuracy                           0.91     10000
   macro avg       0.91      0.91      0.91     10000
weighted avg       0.91      0.91      0.91     10000



#### Выводы

In [25]:
df_metrics

Unnamed: 0,time,Accuracy
BaseLine,5.27 s,0.9589
tanh,5.32 s,0.961
sigmoid,5.32 s,0.9629
SGD,4.97 s,0.9413
reshape,6.18 s,0.9078


Как показывают полученные метрики, изменение как функции активации, так и метода оптимизации оказывает влияние на скорость и качество модели. Более простой метод оптимизации <code>SGD</code> работает быстрее, но при этом уступает в качестве более сложным и медленным аналогам.

Также важна структура нейронной сети. Расположение одинакового количества нейронов в разных количествах слоёв даёт различные результаты. Два слоя по 64 нейрона справились с задачей лучше, чем восемь слоёв по 16 нейронов при одинаковом суммарном количестве нейронов в обоих случаях.