In [None]:
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.nn import functional as F

from torchvision import transforms
from torchvision.datasets import FashionMNIST
from torch.utils.data import Dataset, DataLoader

from matplotlib import pyplot as plt
from IPython.display import clear_output



import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch

class FashionMNISTCSV(Dataset):
    def __init__(self, csv_file, transform=None, has_labels=True):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.has_labels = has_labels  

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.has_labels:
            label = self.data.iloc[idx, 0]
            image = self.data.iloc[idx, 1:].values.astype('float32').reshape(28, 28)
        else:
            label = -1 
            image = self.data.iloc[idx, :].values.astype('float32').reshape(28, 28)
        
        if self.transform:
            image = self.transform(image)
        
        return image, label
    




In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# train_fmnist_data = FashionMNIST(
#     ".", train=True, transform=torchvision.transforms.ToTensor(), download=True
# )
# test_fmnist_data = FashionMNIST(
#     ".", train=False, transform=torchvision.transforms.ToTensor(), download=True
# )


# train_fmnist_data = pd.read_csv('/mnt/sda1/MIPT Common/Глубокое обучение СФ/dl-mfti-2024-classificator/fmnist_train.csv')

# # train_fmnist_data = torch.tensor(train_fmnist_data.values)
# test_fmnist_data = pd.read_csv('/mnt/sda1/MIPT Common/Глубокое обучение СФ/dl-mfti-2024-classificator/fmnist_test.csv')

# train_fmnist_data.to_csv('fmnist_test_drop_id.csv')
# # test_fmnist_data = torch.tensor(test_fmnist_data.values)





# train_fmnist_data = train_fmnist_data.fillna(0)
# train_fmnist_data = train_fmnist_data.astype('float32')
# train_fmnist_data.dtypes

# df = pd.read_csv('/mnt/sda1/MIPT Common/Глубокое обучение СФ/fmnist_train_drop_id.csv')
# train_data = df.iloc[:][0:13000]
# test_data = df.iloc[:][13000:-1]
# train_data.to_csv('train_data.csv', index=False)
# test_data.to_csv('test_data.csv', index=False)


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_fmnist_data = FashionMNISTCSV('/mnt/sda1/MIPT Common/Глубокое обучение СФ/train_data.csv', transform=transform)
test_fmnist_data = FashionMNISTCSV('/mnt/sda1/MIPT Common/Глубокое обучение СФ/test_data.csv', transform=transform)

train_data_loader = DataLoader(train_fmnist_data, batch_size=32, shuffle=True, num_workers=2)
test_data_loader = DataLoader(test_fmnist_data, batch_size=32, shuffle=False, num_workers=2)


In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64 * 14 * 14, 10)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc1(x)
        return x

model = CNN().to(device)

def predict(model, csv_file, batch_size=32, device='cpu'):
    
    dataset = FashionMNISTCSV(csv_file, transform=transform, has_labels=False)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    model.eval()
    predicted_labels = []
    
    with torch.no_grad():
        for images, _ in dataloader:
            images = images.to(device)
            outputs = model(images)
            _, batch_preds = torch.max(outputs, 1)
            predicted_labels.extend(batch_preds.cpu().numpy())
    
    return np.array(predicted_labels)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00015)

num_epochs = 15
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_data_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

def evaluate(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

train_acc = evaluate(model, train_data_loader)
test_acc = evaluate(model, test_data_loader)

print(f"Train accuracy: {train_acc:.4f}")
print(f"Test accuracy: {test_acc:.4f}")


Epoch 1/15, Loss: 251.6783
Epoch 2/15, Loss: 164.4157
Epoch 3/15, Loss: 143.4015
Epoch 4/15, Loss: 127.4557
Epoch 5/15, Loss: 115.1273
Epoch 6/15, Loss: 106.9556
Epoch 7/15, Loss: 99.5618
Epoch 8/15, Loss: 92.3631
Epoch 9/15, Loss: 86.1879
Epoch 10/15, Loss: 79.3259
Epoch 11/15, Loss: 73.4144
Epoch 12/15, Loss: 69.8943
Epoch 13/15, Loss: 65.4011
Epoch 14/15, Loss: 61.6941
Epoch 15/15, Loss: 59.5424
Train accuracy: 0.9714
Test accuracy: 0.9000


In [None]:

test_csv_path = '/mnt/sda1/MIPT Common/Глубокое обучение СФ/fmnist_test_drop_id.csv'

predictions = predict(model, test_csv_path, device=device)

display(type(predictions))

numpy.ndarray

In [15]:
df = pd.DataFrame(predictions)
display(df)
df.to_csv('submission.csv')

Unnamed: 0,0
0,0
1,1
2,2
3,2
4,3
...,...
9995,0
9996,6
9997,8
9998,8
