In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd

In [None]:
# Загрузка данных
train_data = np.load("emnist-byclass-train.npz")
test_data = np.load("emnist-byclass-test.npz")

# Разделение на картинки и метки
training_images = train_data["training_images"]
training_labels = train_data["training_labels"]
testing_images = test_data["testing_images"]

# Преобразование данных в формат тензоров PyTorch
X_train = torch.tensor(training_images, dtype=torch.float32).unsqueeze(1) / 255.0  # Нормализация и добавление канала
y_train = torch.tensor(training_labels, dtype=torch.long)

X_test = torch.tensor(testing_images, dtype=torch.float32).unsqueeze(1) / 255.0

# Создание DataLoader для обучения и тестирования
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
test_loader = DataLoader(X_test, batch_size=64, shuffle=False)


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class EMNISTModel(nn.Module):
    def __init__(self):
        super(EMNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 3 * 3, 256)
        self.fc2 = nn.Linear(256, 62)  # 62 класса для EMNIST

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 3 * 3)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
import torch.optim as optim

# Инициализация модели, функции потерь и оптимизатора
model = EMNISTModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Функция для обучения
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_accuracy = correct / total
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}, Accuracy: {train_accuracy}")


In [None]:
# Получение предсказаний на тестовой выборке
def predict_and_save(model, test_loader, filename="predictions.csv"):
    model.eval()  # Переключаем модель в режим оценки
    predictions = []

    with torch.no_grad():
        for i, images in enumerate(test_loader):
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            for j, pred in enumerate(predicted):
                predictions.append({"Id": i * test_loader.batch_size + j, "Category": pred.item()})

    # Сохранение предсказаний в CSV
    predictions_df = pd.DataFrame(predictions)
    predictions_df.to_csv(filename, index=False)


In [None]:
train_model(model, train_loader, criterion, optimizer, epochs=15)
predict_and_save(model, test_loader)