### Wczytanie danych z biblioteki TorchVision

In [None]:
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data import DataLoader


# transformacje dla pre-processingu
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.286,), (0.353,))
    ]
)

# FashionMNIST dataset
train_dataset = FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Data loader
batch_size = 128

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### EDA

In [None]:
from matplotlib import pyplot as plt
LABELS = {
  0: "T-shirt/top",
  1: "Trouser",
  2: "Pullover",
  3: "Dress",
  4: "Coat",
  5: "Sandal",
  6: "Shirt",
  7: "Sneaker",
  8: "Bag",
  9: "Ankle boot"
}

fig, axes = plt.subplots(ncols=3, nrows=5, figsize=(4, 5))
axes = axes.flatten()

for i,(img, label) in enumerate(train_dataset):
  if i >= 15: break

  axes[i].imshow(img[0, :, :], cmap="gray")
  axes[i].axis("off")
  axes[i].set_title(LABELS[label])

plt.tight_layout()
plt.show()

### Definiowane dwóch modeli konwolucyjnych

In [None]:
# WŁASNA SIEĆ NEURONOWA

import torch
import torch.nn as nn
import torch.nn.functional as F


class CustomCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3)        
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
        # Pooling layer
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
        # ReLU
        self.relu = nn.ReLU()
        # Bottleneck pool
        self.bottleneck = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5)
        # Fully connected layers
        self.fc1 = nn.Linear(32, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x))) # Conv block 1
        x = self.pool(self.relu(self.conv2(x))) # Conv block 2
        x = self.relu(self.bottleneck(x)) # Conv block 3 - bottleneck
        x = torch.flatten(x, 1) # Flatten from B C H W to B C
        x = self.relu(self.fc1(x)) # Linear layer 1
        x = self.relu(self.fc2(x)) # Linear layer 2
        x = self.fc3(x)
        return x
    
model_custom = CustomCNN()

In [None]:
# RESNET18
from torchvision.models import resnet18, ResNet18_Weights

model_resnet = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

model_resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False) # Zmiana pierwszej warstwy konwolucyjnej na 1 kanał ze dla zdjęć GRAYSCALE

In [None]:
print(f"Custom model: {sum(p.numel() for p in model_custom.parameters() if p.requires_grad)} trenowalnych parametrów")

In [None]:
print(f"ResNet18: {sum(p.numel() for p in model_resnet.parameters() if p.requires_grad)} trenowalnych parametrów")

In [None]:
!pip install torchview
from torchview import draw_graph

In [None]:

model_visualizer = draw_graph(model_custom, input_size=(batch_size, 1, 28, 28)) # input_size=(batch_size, 1, 28, 28) - rozmiar wejściowy (B, C, H, W)
model_visualizer.visual_graph

In [None]:
model_visualizer = draw_graph(model_resnet, input_size=(batch_size, 1, 28, 28)) # input_size=(batch_size, 1, 28, 28) - rozmiar wejściowy (B, C, H, W)
model_visualizer.visual_graph

### Trening modeli

In [None]:
def plot_loss(train_loss, val_loss, val_accuracy, save_name):

    fig, axes = plt.subplots(ncols=2, figsize=(8, 4))

    
    axes[0].plot(val_loss, marker='.', label='Validation')
    axes[0].plot(train_loss, marker='.', label='Train')
    axes[1].plot(val_accuracy, marker='.', label='Validation')

    axes[0].legend()
    axes[1].legend()

    axes[0].set_xlabel("Epoch")
    axes[0].set_ylabel("Loss")
    
    axes[1].set_xlabel("Epoch")
    axes[1].set_ylabel("Accuracy")

    axes[0].set_xticks(np.arange(0, len(train_loss), 1))
    axes[0].set_xticklabels(np.arange(1, len(train_loss) + 1, 1))
    
    axes[1].set_xticks(np.arange(0, len(val_accuracy), 1))
    axes[1].set_xticklabels(np.arange(1, len(val_accuracy) + 1, 1))

    fig.tight_layout()
    fig.show()
    
    fig.savefig(save_name, dpi=300, bbox_inches='tight')

In [None]:
from torch import optim
from tqdm import tqdm

def train(model, save_name, epochs=5, learning_rate=1e-3):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
    print(f"Device: {device_name}")

    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_loss = []
    val_loss = []
    val_accuracy = []

    for epoch in range(epochs):
        
        model.train()
        losses = []
        
        tqdm_train = tqdm(enumerate(train_dataloader, 0),desc=f"[{epoch + 1}/{epochs}] Training", total=len(train_dataloader))
        for i, (inputs, labels) in tqdm_train:
            # move inputs and labels to the device
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            losses.append(loss.item())

            tqdm_train.set_description(f"[{epoch + 1}/{epochs}] Training | Avg. Loss: {np.mean(losses):.4f}")

        train_loss.append(np.mean(losses))

        # validation loss
        model.eval()
        losses = []
        correct_preds = 0

        tqdm_test = tqdm(enumerate(test_dataloader),desc=f"[{epoch + 1}/{epochs}] Validation", total=len(test_dataloader))
        for i, (inputs, labels) in tqdm_test:
            # move inputs and labels to the device
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            losses.append(loss.item())
            correct_preds += (F.softmax(outputs, dim=1).argmax(dim=1) == labels).float().mean().item()

            tqdm_test.set_description(f"[{epoch + 1}/{epochs}] Validation | Avg. Loss: {np.mean(losses):.4f}")

        val_accuracy.append(correct_preds/len(test_dataloader))
        val_loss.append(np.mean(losses))
        
        # print loss
        print(f'\ttrain loss: {train_loss[-1]:.4f} | val loss: {val_loss[-1]:.4f} | val acc.: {val_accuracy[-1]*100.0:.2f}\n')

    torch.save(model.state_dict(), save_name)

    return train_loss, val_loss, val_accuracy

In [None]:
train_loss, val_loss, val_accuracy = train(model_custom, 'model_custom.pth', epochs=10, learning_rate=1e-3)

In [None]:
plot_loss(train_loss, val_loss, val_accuracy, 'model_custom.png')

In [None]:
train_loss, val_loss, val_accuracy = train(model_resnet, 'model_resnet.pth', epochs=10, learning_rate=1e-3)

In [None]:
plot_loss(train_loss, val_loss, val_accuracy, 'model_resnet.png')

In [None]:
del model_custom, model_resnet

"Jeszcze jedna epoka ..." - zagadnienie przetrenowania modeli

<img src="https://raw.githubusercontent.com/Kacper-Marciniak/Kurs-AI/main/Student/P5/model_overfitting.png" alt="overfitting" width="75%"/>

### Walidacja modelu na zbiorze testowym

In [None]:
# 1.6. Czytanie modelu

model_custom = CustomCNN()
model_resnet = resnet18()
model_resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) # Ponownie po załadowaniu - zmiana pierwszej warstwy konwolucyjnej na 1 kanał ze dla zdjęć GRAYSCALE

model_custom.load_state_dict(torch.load("model_custom.pth", weights_only=True))
model_resnet.load_state_dict(torch.load("model_resnet.pth", weights_only=True))

In [None]:
# Prosta ewaluacja - ACCURACY / DOKŁADNOŚĆ modelu (globalna)

def evaluate(model):
    correct_preds = 0

    model.eval()
    with torch.no_grad():
        for images, labels in test_dataloader:            
            correct_preds += (F.softmax(model(images), dim=1).argmax(dim=1) == labels).float().mean().item()

    accuracy = correct_preds * 100 / len(test_dataloader)

    print(f'Global accuracy on test set: {accuracy:.2f}%')

In [None]:
print("Custom model:")
evaluate(model_custom)

In [None]:
print("ResNet18:")
evaluate(model_resnet)

In [None]:
# Confusion matrix / macierz pomyłek
# Jak model radzi sobie z poszczególnymi klasami

def confusion_matrix(model, n_classes=10):
    matrix = np.zeros((n_classes, n_classes),dtype=int)

    model.eval()
    with torch.no_grad():
        for images, labels in test_dataloader:
            
            predicted = torch.max(F.softmax(model(images), dim=1).data, 1)[1]

            for p,l in zip(predicted, labels):
                p,l = p.item(), l.item()
                matrix[p,l] += 1
    
    return matrix

In [None]:
matrix_custom = confusion_matrix(model_custom)

In [None]:
matrix_resnet = confusion_matrix(model_resnet)

In [None]:
fig,ax = plt.subplots(1,2, figsize=(10,5))
ax[0].imshow(matrix_custom, cmap='Blues')
ax[0].set_title("Custom CNN")
ax[0].set_xticks(list(LABELS.keys()))
ax[0].set_yticks(list(LABELS.keys()))
ax[0].set_xticklabels(list(LABELS.values()), rotation=45, ha="right", fontsize=7)
ax[0].set_yticklabels(list(LABELS.values()), fontsize=7)

ax[0].set_ylabel("Predicted")
ax[0].set_xlabel("Ground Truth")

for i in range(10):
    for j in range(10):
        ax[0].text(j, i, matrix_custom[i, j], ha="center", va="center", color="black", fontsize=8)


ax[1].imshow(matrix_resnet, cmap='Blues')
ax[1].set_title("ResNet18")
ax[1].set_xticks(list(LABELS.keys()))
ax[1].set_yticks(list(LABELS.keys()))
ax[1].set_xticklabels(list(LABELS.values()), rotation=45, ha="right", fontsize=7)
ax[1].set_yticklabels(list(LABELS.values()), fontsize=7)

ax[1].set_ylabel("Predicted")
ax[1].set_xlabel("Ground Truth")

for i in range(10):
    for j in range(10):
        ax[1].text(j, i, matrix_resnet[i, j], ha="center", va="center", color="black", fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
# Analiza macierzy pomyłek
# Wyznaczenie metryk dla poszczególnych klas
# Wyznaczenie metryk globalnych - uśrednienie po klasach

def analyze_matrix(matrix):
    global_res = {"accuracy": np.zeros(matrix.shape[0]), "precision": np.zeros(matrix.shape[0]), "recall": np.zeros(matrix.shape[0]), "f1": np.zeros(matrix.shape[0])}
    for i,c in LABELS.items():
        print(f'{c}:')
        tp = matrix[i,i]
        fp = np.sum(matrix[i,:]) - tp
        fn = np.sum(matrix[:,i]) - tp
        tn = np.sum(matrix) - tp - fp - fn

        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1 = 2 * precision * recall / (precision + recall)
        accuracy = (tp+tn) / (tp+fp+fn+tn)

        global_res["accuracy"][i] = accuracy
        global_res["precision"][i] = precision
        global_res["recall"][i] = recall
        global_res["f1"][i] = f1

        print(f'\taccuracy: {accuracy*100.0:.2f}, precision: {precision*100.0:.2f}, recall: {recall*100.0:.2f}, F1: {f1*100.0:.2f}')

    print(f'{"-"*70}\nGlobal (macro) results:')
    print(f'\tAccuracy: {np.mean(global_res["accuracy"])*100.0:.2f}, Precision: {np.mean(global_res["precision"])*100.0:.2f}, Recall: {np.mean(global_res["recall"])*100.0:.2f}, F1: {np.mean(global_res["f1"])*100.0:.2f}')




In [None]:
print("Custom model:\n")
analyze_matrix(matrix_custom)

In [None]:
print("ResNet18:\n")
analyze_matrix(matrix_resnet)