In [45]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPool2D
from tensorflow.keras.utils import to_categorical

# Загрузка данных
train_df = pd.read_csv("D:\\notebooks\\sem2\\dl\\fmnist_train.csv")
test_df = pd.read_csv("D:\\notebooks\\sem2\\dl\\fmnist_test.csv")
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17040 entries, 0 to 17039
Columns: 786 entries, label to Id
dtypes: float64(424), int64(362)
memory usage: 102.2 MB


In [46]:
# Проверим есть ли пропуски в данных
train_df[train_df.isnull().any(axis=1)]

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784,Id
17039,5,0,0,0,0,0,0,0,0,0,...,,,,,,,,,,17039


In [47]:
test_df[test_df.isnull().any(axis=1)]

Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784,Id


In [48]:
# Так как только в одной строке есть пропуски, то удалим её
train_df = train_df.dropna()

In [None]:
# Отделим признаки и таргет
pixel_columns = [col for col in train_df.columns if col.startswith('pixel')]
X = train_df[pixel_columns].values / 255.0  # Нормализация
y = train_df['label'].values

# One-hot кодировка меток
y_cat = to_categorical(y, num_classes=10)

# Тестовая выборка и нормализация
X_test = test_df[pixel_columns].values / 255.0

# Преобразуем данные
X = X.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

# Разделение на train и val
X_train, X_val, y_train, y_val = train_test_split(X, y_cat, test_size=0.1, random_state=42)

# Создаём модель CNN
model = Sequential([
    Conv2D(32, kernel_size=3, padding='same', activation='relu', input_shape=(28, 28, 1)),
    MaxPool2D(),
    Conv2D(64, kernel_size=3, padding='same', activation='relu'),
    MaxPool2D(),
    Conv2D(128, kernel_size=3, padding='same', activation='relu'),
    MaxPool2D(),

    Flatten(),
    
    Dense(128, activation='relu'),
    Dropout(0.3),
    
    Dense(10, activation='softmax')
])

# Компиляция модели
model.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
)

# Обучение модели
model.fit(X_train,
        y_train,
        epochs=16,
        batch_size=128,
        validation_data=(X_val, y_val)
)

# Предикт на тестовых данных
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Сохранение результатов
submission = pd.DataFrame({
    'Id': test_df['Id'],
    'label': predicted_labels
})

submission.to_csv('D:\\notebooks\\sem2\\dl\\submission.csv', index=False)


Epoch 1/16


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - accuracy: 0.5146 - loss: 1.3036 - val_accuracy: 0.7793 - val_loss: 0.5801
Epoch 2/16
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.7819 - loss: 0.5646 - val_accuracy: 0.8386 - val_loss: 0.4518
Epoch 3/16
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.8286 - loss: 0.4677 - val_accuracy: 0.8486 - val_loss: 0.3975
Epoch 4/16
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.8512 - loss: 0.4061 - val_accuracy: 0.8609 - val_loss: 0.3618
Epoch 5/16
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.8680 - loss: 0.3526 - val_accuracy: 0.8685 - val_loss: 0.3464
Epoch 6/16
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.8645 - loss: 0.3559 - val_accuracy: 0.8709 - val_loss: 0.3358
Epoch 7/16
[1m120/120[0m [32m━