In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F


from torch.utils.data import Dataset, DataLoader
import numpy as np
import struct
import os
from torchvision import transforms 
from PIL import Image
import torch.optim as optim


In [2]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")


In [3]:

class obrazki(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),     
            nn.ReLU(),              
            nn.MaxPool2d(2),        # Zmniejsza obrazek 2x (z 28x28 na 14x14)
            
            # Blok 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)         # Zmniejsza obrazek 2x (z 14x14 na 7x7)
        )
        
       
        self.flatten = nn.Flatten()
        
  
        self.classifier = nn.Sequential(
            nn.Linear(64 * 7 * 7, 512), 
            nn.ReLU(),
            nn.Dropout(0.5),            
            
            
            nn.Linear(512, 128),        
            nn.ReLU(),
            nn.Dropout(0.5),
            
            nn.Linear(128, 10)# Wyjście: 10 klas (cyfry MNIST)
        )

    def forward(self, x):
        # Krok 1: Wyciągnij cechy z obrazka (oczy sieci)
        x = self.encoder(x)
        
        # Krok 2: Spłaszcz obrazek 3D do wektora 1D (przygotowanie dla mózgu)
        x = self.flatten(x)
        
        # Krok 3: Podaj decyzję (mózg sieci)
        x = self.classifier(x)
        
        return x 

In [6]:
# odczytywanie danych 

class MojWlasnyMNIST(Dataset):
    def __init__(self, images_path, labels_path,transform=None):

        self.images = self.read_images(images_path)
        self.labels = self.read_labels(labels_path)
        self.transform = transform
      
        assert len(self.images) == len(self.labels) 

    def __len__(self):
       
        return len(self.labels)

    def __getitem__(self, idx):
      
        image = self.images[idx]
        label = self.labels[idx]
        
        image_pil = Image.fromarray(image.numpy(), mode='L')
        
        
        if self.transform:
            image = self.transform(image_pil)
        else:
          
            image = transforms.ToTensor()(image_pil)
        
        return image, label.long()

    
    def read_images(self, path):
        with open(path, 'rb') as f:
            magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
            data = np.frombuffer(f.read(), dtype=np.uint8)
            data = data.reshape(num, rows, cols)
            return torch.from_numpy(data.copy())

    def read_labels(self, path):
        with open(path, 'rb') as f:
            magic, num = struct.unpack(">II", f.read(8))
            data = np.frombuffer(f.read(), dtype=np.uint8)
            return torch.from_numpy(data.copy())



In [7]:

train_img_path = './data/raw/train-images.idx3-ubyte'
train_lbl_path = './data/raw/train-labels.idx1-ubyte'

transform = transforms.Compose([
    transforms.RandomRotation(15),      # Obracaj losowo o +/- 15 stopni
    transforms.RandomAffine(            # Zniekształcaj:
        degrees=0, 
        translate=(0.1, 0.1),           # Przesuwaj o 10% w boki
        scale=(0.9, 1.1)                # Przybliżaj/oddalaj o 10%
    ),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = MojWlasnyMNIST(train_img_path, train_lbl_path,transform=transform)

print(f"Załadowano {len(train_dataset)} obrazków.")

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)


Załadowano 60000 obrazków.


In [9]:

model = obrazki().to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 3 

model.train() 

for epoch in range(num_epochs):
    running_loss = 0.0
    
    for i, (images, labels) in enumerate(train_loader):
        images,labels = images.to(device),labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        

        loss = criterion(outputs, labels)
        

        loss.backward()
        

        optimizer.step()
        

        running_loss += loss.item()
        

        if (i + 1) % 200 == 0:
            print(f'Strata (Loss): {running_loss/200:.4f}')
            running_loss = 0.0


#torch.save(model.state_dict(), 'moj_drugi_model.pth')
#print("Model zapisany jako 'moj_drugi_model.pth'")

  image_pil = Image.fromarray(image.numpy(), mode='L')


Strata (Loss): 1.0926
Strata (Loss): 0.4350
Strata (Loss): 0.3179
Strata (Loss): 0.2782
Strata (Loss): 0.2341
Strata (Loss): 0.2291
Strata (Loss): 0.2085
Strata (Loss): 0.1841
Strata (Loss): 0.1794
Strata (Loss): 0.1801
Strata (Loss): 0.1620
Strata (Loss): 0.1596


In [10]:
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_img_path = './data/raw/t10k-images.idx3-ubyte'
test_lbl_path = './data/raw/t10k-labels.idx1-ubyte'


test_dataset = MojWlasnyMNIST(test_img_path, test_lbl_path, transform=test_transform)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


model.eval()  
poprawne = 0
wszystkie = 0

with torch.no_grad():
    for images, labels in test_loader:
        images,labels = images.to(device),labels.to(device)

        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        wszystkie += labels.size(0)
        poprawne += (predicted == labels).sum().item()

acc = 100 * poprawne / wszystkie
print(f'Dokładność modelu na zbiorze testowym: {acc:.2f}%')

  image_pil = Image.fromarray(image.numpy(), mode='L')


Dokładność modelu na zbiorze testowym: 98.91%
