<a href="https://colab.research.google.com/github/Slavex1809/Slavex1809/blob/main/%D0%9A%D0%BE%D0%BF%D0%B8%D1%8F_%D0%B1%D0%BB%D0%BE%D0%BA%D0%BD%D0%BE%D1%82%D0%B0_%22%D0%9F%D0%BE%D1%81%D1%82%D1%80%D0%BE%D0%B5%D0%BD%D0%B8%D0%B5_%D0%BA%D0%BB%D0%B0%D1%81%D1%81%D0%B8%D1%84%D0%B8%D0%BA%D0%B0%D1%82%D0%BE%D1%80%D0%B0_ipynb%22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt

In [None]:
# Загрузка данных
train_df = pd.read_csv('fmnist_train.csv')
test_df = pd.read_csv('fmnist_test.csv')

print("train_df.shape:", train_df.shape)
print("test_df.shape:", test_df.shape)

# Удаление лишних столбцов (например, Id, и т.д.)
def clean_dataframe(df):
    # Определяем ожидаемые пиксельные колонки
    expected_pixel_columns = [f'pixel{i}' for i in range(784)]

    # Если нет pixel0, но есть pixel1 — переименовываем
    if 'pixel0' not in df.columns and 'pixel1' in df.columns:
        df.rename(columns={f'pixel{i+1}': f'pixel{i}' for i in range(784)}, inplace=True)

    # Удаляем лишние столбцы
    extra_cols = df.columns.difference(expected_pixel_columns + (['label'] if 'label' in df.columns else []))
    if not extra_cols.empty:
        print(f"Удаляем лишние столбцы: {list(extra_cols)}")
        df.drop(columns=extra_cols, inplace=True)

    # Проверяем структуру
    if 'label' in df.columns:
        assert df.shape[1] == 785, f"Ошибка: {df.shape[1]} колонок вместо 785"
    else:
        assert df.shape[1] == 784, f"Ошибка: {df.shape[1]} колонок вместо 784"

    return df

# Чистка данных
train_df = clean_dataframe(train_df)
test_df = clean_dataframe(test_df)

train_df.shape: (17040, 786)
test_df.shape: (10000, 785)
Удаляем лишние столбцы: ['Id']
Удаляем лишние столбцы: ['Id']


In [None]:
# Разделение на признаки и метки
X_train = train_df.drop('label', axis=1).values
y_train = train_df['label'].values

# Рашейп и нормализация
X_train = X_train.reshape(-1, 28, 28).astype(np.float32) / 255.0
X_train = (X_train - 0.5) / 0.5
X_train = X_train.reshape(-1, 1, 28, 28)

# Разделение на train/val
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Конвертация в тензоры
train_tensor = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
val_tensor = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))

batch_size = 128
train_loader = DataLoader(train_tensor, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_tensor, batch_size=batch_size)

In [None]:
class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Dropout2d(0.25)
        )

        self.classifier = nn.Sequential(
            nn.Linear(128 * 3 * 3, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FashionCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, mode='max', patience=3, factor=0.5)

def calculate_accuracy(loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# Обучение
epochs = 20
best_acc = 0.0

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    val_acc = calculate_accuracy(val_loader)
    scheduler.step(val_acc)

    print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Val Acc: {val_acc:.4f}')

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')

print(f'Лучшая точность на валидации: {best_acc:.4f}')

Epoch 1/20, Loss: nan, Val Acc: 0.0951
Epoch 2/20, Loss: nan, Val Acc: 0.0951
Epoch 3/20, Loss: nan, Val Acc: 0.0951
Epoch 4/20, Loss: nan, Val Acc: 0.0951
Epoch 5/20, Loss: nan, Val Acc: 0.0951
Epoch 6/20, Loss: nan, Val Acc: 0.0951
Epoch 7/20, Loss: nan, Val Acc: 0.0951
Epoch 8/20, Loss: nan, Val Acc: 0.0951
Epoch 9/20, Loss: nan, Val Acc: 0.0951
Epoch 10/20, Loss: nan, Val Acc: 0.0951
Epoch 11/20, Loss: nan, Val Acc: 0.0951
Epoch 12/20, Loss: nan, Val Acc: 0.0951
Epoch 13/20, Loss: nan, Val Acc: 0.0951
Epoch 14/20, Loss: nan, Val Acc: 0.0951
Epoch 15/20, Loss: nan, Val Acc: 0.0951
Epoch 16/20, Loss: nan, Val Acc: 0.0951
Epoch 17/20, Loss: nan, Val Acc: 0.0951
Epoch 18/20, Loss: nan, Val Acc: 0.0951
Epoch 19/20, Loss: nan, Val Acc: 0.0951
Epoch 20/20, Loss: nan, Val Acc: 0.0951
Лучшая точность на валидации: 0.0951


In [None]:
# Подготовка тестового набора
X_test = test_df.values

# Рашейп и нормализация
X_test = X_test.reshape(-1, 28, 28).astype(np.float32) / 255.0
X_test = (X_test - 0.5) / 0.5
X_test = X_test.reshape(-1, 1, 28, 28)

# Тензоры
test_tensor = TensorDataset(torch.tensor(X_test))
test_loader = DataLoader(test_tensor, batch_size=128)

# Загрузка лучшей модели
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

all_predictions = []
with torch.no_grad():
    for images, in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_predictions.extend(predicted.cpu().numpy())

# Сохранение submission
submission = pd.DataFrame({
    'Id': range(len(all_predictions)),
    'label': all_predictions
})
submission.to_csv('submission.csv', index=False)
print("✅ Файл submission.csv создан!")

✅ Файл submission.csv создан!
