In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from torch.utils.data import WeightedRandomSampler
from PIL import Image
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import torchvision.models as models
from tqdm import tqdm
import copy


# Etap 2
Z ostatniego etapu otrzymaliśmy zdjęcia przygotowane do następnego etapu i zapisane w foldderze 'output'.
Budując i testując modele musimy zwrócić uwagę na brak zbalansowania zbioru. Będziemy się starać balansować zbiór, tak aby Accuracy wykorzystywane do stopnia wytrenowania modelu było adekwatną miarą oceny.

## Pierwsze podejście
Do tej grupy modelów balansowanie zbiorów uzyskaliśmy za pomocą obrotów i odbić i ponownego zapisu plików do folderu 'output_balanced'. Ze względu na konieczność przechowywania dużej liczby zdjęć nie jest to najlepsze wyjście, ale jest to nasz punkt startowy.

### Balansowanie - Oversampling

Funkcja balansująca klasy. Wynik folder 'output_balanced'

In [6]:

input_root = "output"
output_root = "output_balanced"

augmentation_times = {
    "Normal": ["h", "v"],  # 2 razy więcej 
    "AVM": ["h90", "h180", "h270", "v90", "v180", "v270"],  # 6 razy więcej
    "Ulcer": ["h", "h90", "h180", "h270", "v", "v90", "v180", "v270"],  # 8 razy więcej 
}

for root, _, files in os.walk(input_root):
    class_name = os.path.basename(root)  

    if class_name not in augmentation_times:
        continue 

    for file in files:
        if not file.endswith(".bmp"):
            continue 

        input_path = os.path.join(root, file)

        relative_path = os.path.relpath(root, input_root)
        output_dir = os.path.join(output_root, relative_path)
        os.makedirs(output_dir, exist_ok=True)

        base, ext = os.path.splitext(file)
        variants = augmentation_times[class_name]

        image = cv2.imread(input_path)

        transformed_images = {
            "base" : image,
            "h": cv2.flip(image, 1),
            "v": cv2.flip(image, 0),
            "90": cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE),
            "180": cv2.rotate(image, cv2.ROTATE_180),
            "270": cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE),
            "h90": cv2.flip(cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE), 1),
            "h180": cv2.flip(cv2.rotate(image, cv2.ROTATE_180), 1),
            "h270": cv2.flip(cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE), 1),
            "v90": cv2.flip(cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE), 0),
            "v180": cv2.flip(cv2.rotate(image, cv2.ROTATE_180), 0),
            "v270": cv2.flip(cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE), 0)
        }

        # Zapisujemy tylko te transformacje, które są w augmentation_config dla danej klasy
        for variant in variants:
            output_path = os.path.join(output_dir, f"{base}{variant}{ext}")
            cv2.imwrite(output_path, transformed_images[variant])


Sprawdzenie rozkładu klas po balansowaniu.

In [2]:
parent_folder = "output_balanced"
output_data = []  

for folder in os.listdir(parent_folder):
    folder_path = os.path.join(parent_folder, folder)
    if os.path.isdir(folder_path):
        count = sum(1 for f in os.listdir(folder_path) if f.endswith(".bmp"))
        output_data.append([folder, count])  
df_output = pd.DataFrame(output_data, columns=["Nazwa klasy", "Liczba zdjęć"])
df_output

Unnamed: 0,Nazwa klasy,Liczba zdjęć
0,AVM,4008
1,Normal,4304
2,Ulcer,3720


Procent zbalansowania: 

In [3]:
proc_balanced = df_output.loc[2, "Liczba zdjęć"] /df_output.loc[1, "Liczba zdjęć"] *100
print(f"Procent zbalansowania: {proc_balanced:.2f}%")

Procent zbalansowania: 86.43%


### Podział dataset

In [None]:
def save_to_list():
    output_root = "output_balanced"
    X = []
    y = []
    label_to_index = {
        label: idx for idx, label in enumerate({'AVM', 'Normal', 'Ulcer'})
    }
    print(label_to_index)
    for root, _, files in os.walk(output_root):
        for file in files:
            if file.endswith(".bmp"): 
                input_path = os.path.join(root, file)

                base, ext = os.path.splitext(file)
                new_filename = f"{base}{ext}"
                image = cv2.imread(input_path)

                X.append(image)
                y.append(label_to_index[new_filename.split("_")[0]])
    return X, y

X, y = save_to_list()

{'AVM': 0, 'Ulcer': 1, 'Normal': 2}


Stratify w celu zachowania równowagi pomiędzy zbiorami.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123, stratify=y)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=123, stratify=y_test)

### Dataset Preparation 1

In [5]:
# https://discuss.pytorch.org/t/balanced-sampling-between-classes-with-torchvision-dataloader/2703/2

class ImageDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform 
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = Image.fromarray(self.images[idx])  
        label = torch.tensor(self.labels[idx], dtype=torch.long) 

        if self.transform:
            image = self.transform(image)
            
        
        return image, label

Dla zbioru treningowego dodajemy rotacje odpicia w celu większego urozmaicenia zdjęć (augumentacja). Dla zbioru testowego tylko zmiana rozmiaru i normalizacja.

In [None]:

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),          
    transforms.RandomRotation((-30,30)),  
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),                  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),                  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])  


train_dataset = ImageDataset(X_train, y_train, transform=train_transform)
test_dataset = ImageDataset(X_test, y_test, transform=test_transform)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True)

## Funckcje do treningu i ewaluacji

Funkcja trenująca i oceniająca za pomocą Accuracy

In [None]:
def train_and_evaluate(model, train_loader, test_loader, epochs=10, learning_rate=0.001, device='cuda' if torch.cuda.is_available() else 'cpu'):
    print(f"Using device: {device}") 
    model.to(device)
  
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    print('PRZED EPOKAMI')
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        model.train()
        print('po model.train')
        running_loss = 0.0
        correct, total = 0, 0
        print('po zerowaniu poprawnych i całkowitych')
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        train_acc = 100 * correct / total
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_acc:.2f}%")
    print('po epokach')
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    test_acc = 100 * correct / total
    print(f"Test Accuracy: {test_acc:.2f}%")
    return model


Funkcja trenująca i oceniająca za pomocą większej liczby metryk.

In [3]:
def train_and_evaluate_more_metrics(model, train_loader, test_loader, epochs=10, learning_rate=0.001, device='cuda' if torch.cuda.is_available() else 'cpu'):
    print(f"Using device: {device}") 
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct, total = 0, 0
        all_labels = []
        all_predictions = []
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_labels.extend(labels.cpu().numpy())  
            all_predictions.extend(predicted.cpu().numpy())  
        
        train_acc = 100 * correct / total
        train_precision = precision_score(all_labels, all_predictions, average='weighted', zero_division=0)
        train_recall = recall_score(all_labels, all_predictions, average='weighted', zero_division=0)
        train_f1 = f1_score(all_labels, all_predictions, average='weighted', zero_division=0)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%, Precision: {train_precision:.4f}, Recall: {train_recall:.4f}, F1: {train_f1:.4f}")
    
    # Evaluate on test set
    model.eval()
    correct, total = 0, 0
    all_labels = []
    all_predictions = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    test_acc = 100 * correct / total
    test_precision = precision_score(all_labels, all_predictions, average='weighted', zero_division=0)
    test_recall = recall_score(all_labels, all_predictions, average='weighted', zero_division=0)
    test_f1 = f1_score(all_labels, all_predictions, average='weighted', zero_division=0)
    test_conf_matrix = confusion_matrix(all_labels, all_predictions)

    print(f"Test Accuracy: {test_acc:.2f}%, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}, F1: {test_f1:.4f}")
    print("Confusion Matrix:\n", test_conf_matrix)

    return model


## Model 1

nn.Droput(0.5) <- zapobiega overfittingowi

In [8]:
class CNNModel(nn.Module):
    def __init__(self, num_classes=3):
        super(CNNModel, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),  # ewentualnie dodać jeszcze jedną warstwę pośrednią
            nn.ReLU(),
            nn.Dropout(0.5), # zapobieganie overfittingowi
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x


Z użyciem optimizer AdamW

In [13]:
model_1 = CNNModel(num_classes=3)
trained_model_1 = train_and_evaluate_more_metrics(model_1, train_loader, test_loader, epochs=15)

Using device: cuda
Epoch 1/15, Loss: 0.9449, Train Acc: 54.93%, Precision: 0.5637, Recall: 0.5493, F1: 0.5400
Epoch 2/15, Loss: 0.7370, Train Acc: 68.48%, Precision: 0.7006, Recall: 0.6848, F1: 0.6869
Epoch 3/15, Loss: 0.6324, Train Acc: 74.83%, Precision: 0.7580, Recall: 0.7483, F1: 0.7501
Epoch 4/15, Loss: 0.5342, Train Acc: 79.66%, Precision: 0.8017, Recall: 0.7966, F1: 0.7977
Epoch 5/15, Loss: 0.4403, Train Acc: 83.33%, Precision: 0.8374, Recall: 0.8333, F1: 0.8341
Epoch 6/15, Loss: 0.3745, Train Acc: 86.19%, Precision: 0.8657, Recall: 0.8619, F1: 0.8626
Epoch 7/15, Loss: 0.3275, Train Acc: 87.60%, Precision: 0.8791, Recall: 0.8760, F1: 0.8764
Epoch 8/15, Loss: 0.3099, Train Acc: 88.22%, Precision: 0.8855, Recall: 0.8822, F1: 0.8827
Epoch 9/15, Loss: 0.2623, Train Acc: 90.29%, Precision: 0.9057, Recall: 0.9029, F1: 0.9033
Epoch 10/15, Loss: 0.2396, Train Acc: 91.01%, Precision: 0.9120, Recall: 0.9101, F1: 0.9103
Epoch 11/15, Loss: 0.2186, Train Acc: 91.69%, Precision: 0.9187, Recal

In [14]:
torch.save(trained_model_1.state_dict(), "trained_model_1.pth")

Z użyceim optimizer Adam

In [16]:
model_1 = CNNModel(num_classes=3)
trained_model_1_2 = train_and_evaluate_more_metrics(model_1, train_loader, test_loader, epochs=15)

Using device: cuda
cuda:0
Epoch 1/15, Loss: 0.9190, Train Acc: 58.41%, Precision: 0.6029, Recall: 0.5841, F1: 0.5847
Epoch 2/15, Loss: 0.7864, Train Acc: 65.14%, Precision: 0.6687, Recall: 0.6514, F1: 0.6548
Epoch 3/15, Loss: 0.7366, Train Acc: 68.23%, Precision: 0.6987, Recall: 0.6823, F1: 0.6853
Epoch 4/15, Loss: 0.6682, Train Acc: 71.99%, Precision: 0.7316, Recall: 0.7199, F1: 0.7222
Epoch 5/15, Loss: 0.5827, Train Acc: 76.78%, Precision: 0.7792, Recall: 0.7678, F1: 0.7689
Epoch 6/15, Loss: 0.4708, Train Acc: 81.73%, Precision: 0.8237, Recall: 0.8173, F1: 0.8177
Epoch 7/15, Loss: 0.4285, Train Acc: 83.99%, Precision: 0.8436, Recall: 0.8399, F1: 0.8403
Epoch 8/15, Loss: 0.3707, Train Acc: 86.20%, Precision: 0.8660, Recall: 0.8620, F1: 0.8623
Epoch 9/15, Loss: 0.3201, Train Acc: 87.71%, Precision: 0.8799, Recall: 0.8771, F1: 0.8774
Epoch 10/15, Loss: 0.3233, Train Acc: 87.92%, Precision: 0.8821, Recall: 0.8792, F1: 0.8795
Epoch 11/15, Loss: 0.2817, Train Acc: 89.50%, Precision: 0.8976

In [17]:
torch.save(trained_model_1_2.state_dict(), "trained_model_1_2.pth")

Dodanie innych metryk nie wniosło nowych informacji poza tym, że dane są dobrze zbalansowane oraz model dobrze generalizuje.
Interpretacja ConfusionMatrix: Na przekątnej wartości dobrze zidentyfikowane.

## Model 2

Model zmienił funkcję aktywacji z ReLU na LeakyReLU, która przepuszcza wartośći ujemne w pewnym stopniu (ustawiony na 0.1).
ReLU : jeśli otrzyma dużą liczbę ujmenych wartości to gradient może zatrzymać się na 0 i przestać się uczyć.
LeakyReLU: jesli otrzyma dużą liczbę ujmenych wartość to gradient nie będzie 0 tylko mały i proces uczenia nie zatrzyma się.
Czy otrzymamy lepsze wyniki??

In [39]:
class CNNModel_gradient(nn.Module):
    def __init__(self, num_classes=3):
        super(CNNModel_gradient, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.LeakyReLU(0.1),  # LeakyReLU zamiast ReLU
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.LeakyReLU(0.1),  # LeakyReLU zamiast ReLU
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.LeakyReLU(0.1),  # LeakyReLU zamiast ReLU
            nn.MaxPool2d(2, 2)
        )

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),  # ewentualnie dodać jeszcze jedną warstwę pośrednią
            nn.ReLU(),
            nn.Dropout(0.5), # zapobieganie overfittingowi
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x


In [None]:
model2 = CNNModel_gradient(num_classes=3)
trained_model2 = train_and_evaluate_more_metrics(model2, train_loader, test_loader, epochs=15)

Using device: cuda
Epoch 1/15, Loss: 0.8916, Train Acc: 62.48%, Precision: 0.6338, Recall: 0.6248, F1: 0.6266
Epoch 2/15, Loss: 0.5250, Train Acc: 79.40%, Precision: 0.7961, Recall: 0.7940, F1: 0.7942
Epoch 3/15, Loss: 0.4462, Train Acc: 83.29%, Precision: 0.8360, Recall: 0.8329, F1: 0.8333
Epoch 4/15, Loss: 0.4056, Train Acc: 84.79%, Precision: 0.8508, Recall: 0.8479, F1: 0.8483
Epoch 5/15, Loss: 0.4107, Train Acc: 84.92%, Precision: 0.8517, Recall: 0.8492, F1: 0.8495
Epoch 6/15, Loss: 0.3276, Train Acc: 87.90%, Precision: 0.8813, Recall: 0.8790, F1: 0.8793
Epoch 7/15, Loss: 0.3290, Train Acc: 87.51%, Precision: 0.8774, Recall: 0.8751, F1: 0.8753
Epoch 8/15, Loss: 0.2905, Train Acc: 89.23%, Precision: 0.8942, Recall: 0.8923, F1: 0.8925
Epoch 9/15, Loss: 0.3133, Train Acc: 88.15%, Precision: 0.8837, Recall: 0.8815, F1: 0.8818
Epoch 10/15, Loss: 0.2855, Train Acc: 89.27%, Precision: 0.8942, Recall: 0.8927, F1: 0.8929
Epoch 11/15, Loss: 0.2814, Train Acc: 89.54%, Precision: 0.8975, Recal

In [None]:
torch.save(trained_model2.state_dict(), "model2.pth")

LeakyReLU może powodować, że gradienty są mniejsze dla wartości ujemnych, co może spowolnić uczenie się w pewnych przypadkach. -> widzimy spadek accuracy. 

## Model 3

Zmiana nn.Dropout na 0.2 -> sprawdzenie czy w przypadku 1 modelu nie zachodizło zjawisko underfittingu.

In [41]:
class CNNModel_lower_dropout(nn.Module):
    def __init__(self, num_classes=3):
        super(CNNModel_lower_dropout, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512),  # ewentualnie dodać jeszcze jedną warstwę pośrednią
            nn.ReLU(),
            nn.Dropout(0.2), # zapobieganie overfittingowi
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x


In [None]:
model3 = CNNModel_lower_dropout(num_classes=3)
trained_model3 = train_and_evaluate_more_metrics(model3, train_loader, test_loader, epochs=15)

Using device: cuda
Epoch 1/15, Loss: 0.9671, Train Acc: 56.30%, Precision: 0.5912, Recall: 0.5630, F1: 0.5621
Epoch 2/15, Loss: 0.7751, Train Acc: 64.70%, Precision: 0.6666, Recall: 0.6470, F1: 0.6504
Epoch 3/15, Loss: 0.6831, Train Acc: 71.00%, Precision: 0.7176, Recall: 0.7100, F1: 0.7121
Epoch 4/15, Loss: 0.6009, Train Acc: 75.11%, Precision: 0.7559, Recall: 0.7511, F1: 0.7525
Epoch 5/15, Loss: 0.5445, Train Acc: 77.95%, Precision: 0.7846, Recall: 0.7795, F1: 0.7806
Epoch 6/15, Loss: 0.4516, Train Acc: 82.06%, Precision: 0.8233, Recall: 0.8206, F1: 0.8210
Epoch 7/15, Loss: 0.3827, Train Acc: 85.14%, Precision: 0.8533, Recall: 0.8514, F1: 0.8515
Epoch 8/15, Loss: 0.3287, Train Acc: 87.37%, Precision: 0.8759, Recall: 0.8737, F1: 0.8739
Epoch 9/15, Loss: 0.2936, Train Acc: 88.78%, Precision: 0.8902, Recall: 0.8878, F1: 0.8881
Epoch 10/15, Loss: 0.2695, Train Acc: 89.56%, Precision: 0.8975, Recall: 0.8956, F1: 0.8958
Epoch 11/15, Loss: 0.2671, Train Acc: 89.98%, Precision: 0.9013, Recal

In [None]:
torch.save(trained_model3.state_dict(), "model3.pth")

## Model 4

zwiększenie ilości filtrów. dla małego zbiru danych może nie być najlepsze, ponieważ model może zacząć się uczyć na pamięć.

In [None]:
class CNNModel_more_filters(nn.Module):
    def __init__(self, num_classes=3):
        super(CNNModel_more_filters, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 28 * 28, 512),  # -> tu ustawiamy liczbę filtrów w ostatniej warstwie konwolucyjnej czyli Conv2d razy wymiary po wszystkich maxPoolach
            nn.ReLU(),
            nn.Dropout(0.5), # zapobieganie overfittingowi
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x


In [None]:
model4 = CNNModel_more_filters(num_classes=3)
trained_model4 = train_and_evaluate_more_metrics(model4, train_loader, test_loader, epochs=15)

Using device: cuda
Epoch 1/15, Loss: 1.1168, Train Acc: 48.37%, Precision: 0.4864, Recall: 0.4837, F1: 0.4509
Epoch 2/15, Loss: 0.9136, Train Acc: 54.41%, Precision: 0.6105, Recall: 0.5441, F1: 0.5113
Epoch 3/15, Loss: 0.8660, Train Acc: 58.14%, Precision: 0.6357, Recall: 0.5814, F1: 0.5715
Epoch 4/15, Loss: 0.8328, Train Acc: 61.44%, Precision: 0.6606, Recall: 0.6144, F1: 0.6111
Epoch 5/15, Loss: 0.8333, Train Acc: 61.82%, Precision: 0.6505, Recall: 0.6182, F1: 0.6189
Epoch 6/15, Loss: 0.7284, Train Acc: 68.06%, Precision: 0.7028, Recall: 0.6806, F1: 0.6836
Epoch 7/15, Loss: 0.6378, Train Acc: 73.10%, Precision: 0.7466, Recall: 0.7310, F1: 0.7333
Epoch 8/15, Loss: 0.5795, Train Acc: 76.79%, Precision: 0.7766, Recall: 0.7679, F1: 0.7700
Epoch 9/15, Loss: 0.5569, Train Acc: 77.74%, Precision: 0.7846, Recall: 0.7774, F1: 0.7793
Epoch 10/15, Loss: 0.5147, Train Acc: 79.92%, Precision: 0.8064, Recall: 0.7992, F1: 0.8009
Epoch 11/15, Loss: 0.4547, Train Acc: 82.78%, Precision: 0.8339, Recal

In [None]:
torch.save(trained_model4.state_dict(), "model4.pth")

## Model 5

Zwiększenie warstw o jeden.

In [None]:
class CNNModel_more_layers(nn.Module):
    def __init__(self, num_classes=3):
        super(CNNModel_more_layers, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),  
            nn.ReLU(),
            nn.MaxPool2d(2, 2)  # Nowa warstwa MaxPooling
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 14 * 14, 512),  
            nn.ReLU(),
            nn.Dropout(0.5), # zapobieganie overfittingowi
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x



In [15]:
model5 = CNNModel_more_layers(num_classes=3)
trained_model5 = train_and_evaluate_more_metrics(model5, train_loader, test_loader, epochs=15)

Using device: cuda
cuda:0
Epoch 1/15, Loss: 0.8792, Train Acc: 57.68%, Precision: 0.5844, Recall: 0.5768, F1: 0.5758
Epoch 2/15, Loss: 0.7073, Train Acc: 69.27%, Precision: 0.6999, Recall: 0.6927, F1: 0.6944
Epoch 3/15, Loss: 0.6039, Train Acc: 75.47%, Precision: 0.7595, Recall: 0.7547, F1: 0.7554
Epoch 4/15, Loss: 0.4811, Train Acc: 81.17%, Precision: 0.8141, Recall: 0.8117, F1: 0.8120
Epoch 5/15, Loss: 0.3813, Train Acc: 85.35%, Precision: 0.8555, Recall: 0.8535, F1: 0.8537
Epoch 6/15, Loss: 0.3366, Train Acc: 87.11%, Precision: 0.8736, Recall: 0.8711, F1: 0.8714
Epoch 7/15, Loss: 0.2993, Train Acc: 88.46%, Precision: 0.8874, Recall: 0.8846, F1: 0.8849
Epoch 8/15, Loss: 0.3043, Train Acc: 88.72%, Precision: 0.8889, Recall: 0.8872, F1: 0.8874
Epoch 9/15, Loss: 0.2644, Train Acc: 89.75%, Precision: 0.9004, Recall: 0.8975, F1: 0.8979
Epoch 10/15, Loss: 0.2349, Train Acc: 91.20%, Precision: 0.9138, Recall: 0.9120, F1: 0.9122
Epoch 11/15, Loss: 0.2581, Train Acc: 90.29%, Precision: 0.9056

In [None]:
torch.save(treined_model5.state_dict(), "model5.pth")

# Model 6
W stosunku do modelu 5 zmniejszono dropout do 0.3.

In [None]:
class CNNModel_more_layers_lower_dropout(nn.Module):
    def __init__(self, num_classes=3):
        super(CNNModel_more_layers_lower_dropout, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2) 
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 14 * 14, 512), 
            nn.ReLU(),
            nn.Dropout(0.3), 
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x



In [None]:
model6 = CNNModel_more_layers_lower_dropout(num_classes=3)
trained_model6 = train_and_evaluate_more_metrics(model6, train_loader, test_loader, epochs=15)

Using device: cuda
cuda:0
Epoch 1/15, Loss: 0.8678, Train Acc: 59.23%, Precision: 0.6008, Recall: 0.5923, F1: 0.5919
Epoch 2/15, Loss: 0.5794, Train Acc: 77.43%, Precision: 0.7770, Recall: 0.7743, F1: 0.7747
Epoch 3/15, Loss: 0.4188, Train Acc: 84.04%, Precision: 0.8425, Recall: 0.8404, F1: 0.8406
Epoch 4/15, Loss: 0.3466, Train Acc: 87.32%, Precision: 0.8754, Recall: 0.8732, F1: 0.8733
Epoch 5/15, Loss: 0.3056, Train Acc: 88.86%, Precision: 0.8909, Recall: 0.8886, F1: 0.8888
Epoch 6/15, Loss: 0.2469, Train Acc: 90.52%, Precision: 0.9069, Recall: 0.9052, F1: 0.9054
Epoch 7/15, Loss: 0.2515, Train Acc: 90.58%, Precision: 0.9075, Recall: 0.9058, F1: 0.9060
Epoch 8/15, Loss: 0.2198, Train Acc: 91.49%, Precision: 0.9159, Recall: 0.9149, F1: 0.9149
Epoch 9/15, Loss: 0.1777, Train Acc: 93.32%, Precision: 0.9338, Recall: 0.9332, F1: 0.9332
Epoch 10/15, Loss: 0.1618, Train Acc: 94.11%, Precision: 0.9415, Recall: 0.9411, F1: 0.9412
Epoch 11/15, Loss: 0.1476, Train Acc: 94.47%, Precision: 0.9452

In [None]:
torch.save(trained_model6.state_dict(), "model6.pth")

Zapis modeli

In [None]:
torch.save(model.state_dict(), "model.pth")
torch.save(model2.state_dict(), "model2.pth")
torch.save(model3.state_dict(), "model3.pth")
torch.save(trained_model.state_dict(), "trained_model.pth")

Odczyt modeli

In [None]:
import torch

# Załaduj model do tej samej architektury
model = CNNModel(num_classes=3)  # Musisz utworzyć model przed wczytaniem wag
model.load_state_dict(torch.load("model.pth"))
model.eval()  # Ustawienie modelu w tryb ewaluacji (nie treningu)


# Inne podejście do balansowania
Użycie weightedSampler aby dodać bias do częstości wybierania pocszczególnych elementów z różnych klas

In [9]:
def save_to_list():
    output_root = "..\\output"
    X = []
    y = []
    label_to_index = {
        label: idx for idx, label in enumerate({'AVM', 'Normal', 'Ulcer'})
    }
    print(label_to_index)
    for root, _, files in os.walk(output_root):
        for file in files:
            if file.endswith(".bmp"): 
                input_path = os.path.join(root, file)

                base, ext = os.path.splitext(file)
                new_filename = f"{base}{ext}"
                image = cv2.imread(input_path)

                X.append(image)
                y.append(label_to_index[new_filename.split("_")[0]])
    return X, y

X, y = save_to_list()

{'AVM': 0, 'Ulcer': 1, 'Normal': 2}


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123, stratify=y)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=123, stratify=y_test)

In [11]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),          
    transforms.RandomRotation((-180,180)),  
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),                  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])  # do testów - bez zmian


train_dataset = ImageDataset(X_train, y_train, transform=train_transform)
test_dataset = ImageDataset(X_test, y_test, transform=test_transform)

class_counts = np.bincount(y)  
class_weights = 1.0 / class_counts 

sample_weights_tr = [class_weights[label] for label in y_train]
sample_weights_ts = [class_weights[label] for label in y_test]

samplertr = WeightedRandomSampler(sample_weights_tr, num_samples= len(sample_weights_tr), replacement=True)
samplerts = WeightedRandomSampler(sample_weights_ts, num_samples= len(sample_weights_ts), replacement=True)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler = samplertr)
test_loader = DataLoader(test_dataset, batch_size=batch_size, sampler = samplerts)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [1]:
class CNNModel(nn.Module):
    def __init__(self, num_classes=3):
        super(CNNModel, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 28 * 28, 512), 
            nn.ReLU(),
            nn.Dropout(0.5), 
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x


NameError: name 'nn' is not defined

In [None]:
model_A = CNNModel(num_classes=3)
trained_model_A = train_and_evaluate_more_metrics(model_A, train_loader, test_loader, epochs=15)

Using device: cuda


In [None]:
torch.save(trained_model_A.state_dict(), "trained_model_A.pth")

## Model Transfer Learning
Czasami nie opaca się trenować modelu od zera. Transfer Learning polega na użyciu wytrenowanego modelu i wycięcie ostatnich warstw i zastąpienie ich nowymi. Jest to lepsze dla datasetów z względnie niedużą ilością zdjęć, dlatego użyjemy go wraz z drugim sposobem balansowania

Użyjemy modelu ResNet50 który jest jednym z lepszych modeli do wyboru dla naszych celów

In [None]:
class ResNet50Transfer(nn.Module):
    def __init__(self, num_classes=3):
        super(ResNet50Transfer, self).__init__()
        self.base_model = models.resnet50(pretrained=True)

        for param in self.base_model.parameters():
            param.requires_grad = False
        
        for name, param in self.base_model.named_parameters():
            if "layer4" in name or "layer3" in name: 
                param.requires_grad = True

        in_features = self.base_model.fc.in_features 
        self.base_model.fc = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes) 
        )

    def forward(self, x):
        return self.base_model(x)


In [15]:

model = ResNet50Transfer(num_classes=3).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

num_epochs = 15
best_val_loss = float('inf')
best_model_weights = copy.deepcopy(model.state_dict())

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / total
    train_acc = correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
            val_total += labels.size(0)

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())  # <-- fix here

    test_conf_matrix = confusion_matrix(all_labels, all_predictions)

    val_loss /= val_total
    val_acc = val_correct / val_total

    print(f"Train Loss: {train_loss:.4f}, Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f}, Acc: {val_acc:.4f}")
    print("Confusion Matrix:\n", test_conf_matrix)
    scheduler.step(val_loss)

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_weights = copy.deepcopy(model.state_dict())
        print("Best model updated")

# Load best model for testing or saving
model.load_state_dict(best_model_weights)
torch.save(model.state_dict(), "resnet50_model.pth")


Epoch 1/15: 100%|██████████| 72/72 [00:26<00:00,  2.68it/s]


Train Loss: 0.4798, Acc: 0.8124 | Val Loss: 1.2805, Acc: 0.6673
Confusion Matrix:
 [[166   0   0]
 [ 31 144   0]
 [125   8  19]]
Best model updated


Epoch 2/15: 100%|██████████| 72/72 [00:27<00:00,  2.66it/s]


Train Loss: 0.2909, Acc: 0.8916 | Val Loss: 0.1300, Acc: 0.9655
Confusion Matrix:
 [[156   1   8]
 [  0 173   1]
 [  1   6 147]]
Best model updated


Epoch 3/15: 100%|██████████| 72/72 [00:27<00:00,  2.63it/s]


Train Loss: 0.2418, Acc: 0.9138 | Val Loss: 0.1760, Acc: 0.9432
Confusion Matrix:
 [[156   6   6]
 [  0 181   0]
 [  7   9 128]]


Epoch 4/15: 100%|██████████| 72/72 [00:27<00:00,  2.67it/s]


Train Loss: 0.1871, Acc: 0.9295 | Val Loss: 0.1692, Acc: 0.9594
Confusion Matrix:
 [[150   5   6]
 [  0 158   0]
 [  5   4 165]]


Epoch 5/15: 100%|██████████| 72/72 [00:27<00:00,  2.62it/s]


Train Loss: 0.1527, Acc: 0.9539 | Val Loss: 0.1548, Acc: 0.9615
Confusion Matrix:
 [[151   0  11]
 [  0 167   8]
 [  0   0 156]]


Epoch 6/15: 100%|██████████| 72/72 [00:27<00:00,  2.65it/s]


Train Loss: 0.1382, Acc: 0.9539 | Val Loss: 0.1795, Acc: 0.9229
Confusion Matrix:
 [[158   0   0]
 [  8 158   7]
 [ 19   4 139]]


Epoch 7/15: 100%|██████████| 72/72 [00:27<00:00,  2.65it/s]


Train Loss: 0.1159, Acc: 0.9626 | Val Loss: 0.1221, Acc: 0.9533
Confusion Matrix:
 [[156   0  14]
 [  0 144   9]
 [  0   0 170]]
Best model updated


Epoch 8/15: 100%|██████████| 72/72 [00:27<00:00,  2.62it/s]


Train Loss: 0.1224, Acc: 0.9604 | Val Loss: 0.0746, Acc: 0.9716
Confusion Matrix:
 [[151   0   1]
 [  1 164   4]
 [  4   4 164]]
Best model updated


Epoch 9/15: 100%|██████████| 72/72 [00:27<00:00,  2.63it/s]


Train Loss: 0.0927, Acc: 0.9678 | Val Loss: 0.0669, Acc: 0.9777
Confusion Matrix:
 [[164   0   3]
 [  6 156   0]
 [  2   0 162]]
Best model updated


Epoch 10/15: 100%|██████████| 72/72 [00:27<00:00,  2.63it/s]


Train Loss: 0.0911, Acc: 0.9665 | Val Loss: 0.0932, Acc: 0.9594
Confusion Matrix:
 [[145   0   5]
 [  3 169  11]
 [  0   1 159]]


Epoch 11/15: 100%|██████████| 72/72 [00:27<00:00,  2.65it/s]


Train Loss: 0.0929, Acc: 0.9700 | Val Loss: 0.0228, Acc: 0.9959
Confusion Matrix:
 [[158   0   0]
 [  0 162   0]
 [  1   1 171]]
Best model updated


Epoch 12/15: 100%|██████████| 72/72 [00:27<00:00,  2.60it/s]


Train Loss: 0.0830, Acc: 0.9769 | Val Loss: 0.0489, Acc: 0.9797
Confusion Matrix:
 [[167   0   2]
 [  0 163   2]
 [  6   0 153]]


Epoch 13/15: 100%|██████████| 72/72 [00:27<00:00,  2.66it/s]


Train Loss: 0.0732, Acc: 0.9765 | Val Loss: 0.0516, Acc: 0.9838
Confusion Matrix:
 [[168   0   3]
 [  0 172   0]
 [  2   3 145]]


Epoch 14/15: 100%|██████████| 72/72 [00:27<00:00,  2.59it/s]


Train Loss: 0.0727, Acc: 0.9743 | Val Loss: 0.0380, Acc: 0.9959
Confusion Matrix:
 [[175   0   1]
 [  0 170   0]
 [  0   1 146]]


Epoch 15/15: 100%|██████████| 72/72 [00:27<00:00,  2.66it/s]


Train Loss: 0.0597, Acc: 0.9822 | Val Loss: 0.0226, Acc: 0.9959
Confusion Matrix:
 [[165   0   1]
 [  0 167   0]
 [  0   1 159]]
Best model updated




In [11]:

model = ResNet50Transfer(num_classes=3).to(device)
model.load_state_dict(torch.load("best_resnet50_model.pth"))
model.eval()




ResNet50Transfer(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential

## Wybór modelu

Jak widać najlepszym modelem okazał się być model transfer learningowy z dokładnością 99.5% i tylko jedną poważną pomyłką (sklasyfikowanie jednej z chorób jako brak choroby) także najprawdopodobniej będziemy kontynuować jego rozwój