In [38]:
import tensorflow as tf
from tensorflow.keras import layers as L, models as M, backend as K
import pandas as pd
import numpy as np

In [59]:
def data_preporation(df):
    """чистим и делим данные"""
    df = df.dropna(how='any', axis=0) # удаляем строки с пустыми значениями
    x = df.drop(columns=['label', 'Id'])
    x = x.astype(np.float32) / 255.0 # нормализуем данные
    y = df['label']
    return x, y

In [60]:
X_train, y_train = data_preporation(pd.read_csv('data/fmnist_train.csv'))
X_train.shape

(17039, 784)

In [167]:
from keras import regularizers

In [None]:
# создание модели
K.clear_session()
model = M.Sequential([
    L.Reshape((28, 28, 1), input_shape=(784,)), # преобразуем вектор в изображение

    L.Conv2D(16, (3, 3), padding='same', activation='relu'), # сверточный слой
    L.BatchNormalization(), # см статью https://habr.com/ru/companies/mvideo/articles/782360/
    L.MaxPool2D(),

    L.Conv2D(32, (3, 3), padding='same', activation='relu'), # сверточный слой
    L.BatchNormalization(),
    L.MaxPool2D(),
    L.Dropout(0.25),

    L.Conv2D(64, (3, 3), padding='same', activation='relu'), # ещё один
    L.BatchNormalization(),
    L.MaxPool2D(),
    L.Dropout(0.25),

    L.Flatten(),
    L.Dense(256, activation='relu'),
    L.BatchNormalization(),
    L.Dropout(0.25),
    L.Dense(10, activation='softmax'), # 10 классов для классификатора
])
model.summary()

  super().__init__(**kwargs)


In [210]:
model.compile(
    loss='sparse_categorical_crossentropy',  # дабы не one-hot-ить данные
    optimizer='adam',
    metrics=['accuracy']  # выводим процент правильных ответов
)

In [211]:
history = model.fit(X_train, y_train, batch_size=128, epochs=20, validation_split=0.2, verbose=1)

Epoch 1/20
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6358 - loss: 1.1053 - val_accuracy: 0.0951 - val_loss: 2.2449
Epoch 2/20
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8100 - loss: 0.5176 - val_accuracy: 0.3313 - val_loss: 2.2272
Epoch 3/20
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8403 - loss: 0.4313 - val_accuracy: 0.5138 - val_loss: 1.6771
Epoch 4/20
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8499 - loss: 0.4086 - val_accuracy: 0.6080 - val_loss: 1.1973
Epoch 5/20
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8638 - loss: 0.3628 - val_accuracy: 0.7999 - val_loss: 0.5489
Epoch 6/20
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8704 - loss: 0.3538 - val_accuracy: 0.8562 - val_loss: 0.3822
Epoch 7/20
[1m107/107[0m 

In [212]:
model.get_weights()

[array([[[[ 0.04200513, -0.20926356, -0.10827813, -0.06078159,
            0.05289129,  0.04841987,  0.10432784,  0.00056268,
           -0.22312224, -0.01229447,  0.07841818, -0.32395518,
           -0.08560474,  0.15947069,  0.14869222, -0.1287079 ]],
 
         [[-0.02623754,  0.05306683,  0.07316899, -0.00903538,
           -0.20800616,  0.10254064,  0.1825765 ,  0.11028069,
           -0.04721626,  0.01979554, -0.20501618, -0.09632609,
           -0.2377539 ,  0.11193553, -0.09141187, -0.19422305]],
 
         [[-0.02796357, -0.20012617,  0.05642617,  0.09631282,
           -0.24064918,  0.07273574, -0.06871526,  0.1710135 ,
            0.20194006, -0.04439515, -0.06379804, -0.02200256,
            0.03065098, -0.07178947, -0.01826782, -0.15703604]]],
 
 
        [[[ 0.15955096, -0.09797479, -0.15668806, -0.12965591,
            0.22088844,  0.01019499,  0.05767986,  0.01848581,
           -0.3364176 ,  0.00972977,  0.12380646, -0.23540835,
            0.00939365,  0.03703738,  0.

In [213]:
df = pd.read_csv('data/fmnist_test.csv')
X_test = df.drop(columns=['Id'])
X_test = X_test.astype(np.float32) / 255.0 # нормализуем данные
X_test

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.035294,0.031373,0.000000,...,0.403922,0.341176,0.219608,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.133333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
2,0.0,0.000000,0.000000,0.0,0.0,0.0,0.054902,0.207843,0.388235,0.066667,...,0.000000,0.000000,0.000000,0.000000,0.247059,0.207843,0.121569,0.000000,0.000000,0.0
3,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.631373,...,0.537255,0.494118,0.549020,0.000000,0.521569,0.878431,0.870588,0.219608,0.000000,0.0
4,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.145098,...,0.125490,0.090196,0.054902,0.078431,0.000000,0.000000,0.003922,0.000000,0.000000,0.0
9996,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.007843,0.203922,0.090196,0.109804,0.000000,0.000000,0.0
9997,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.686275,0.674510,0.674510,0.713726,0.780392,0.870588,0.164706,0.000000,0.003922,0.0
9998,0.0,0.003922,0.011765,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.003922,0.000000,0.000000,0.000000,0.0


In [214]:
predicted_classes = model.predict(X_test).argmax(axis=1)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [215]:
submission_df = pd.DataFrame({
    'Id': range(len(predicted_classes)),
    'label': predicted_classes
})
submission_df.to_csv('data/submission.csv', index=False)