In [1]:
import sys
print(sys.executable)


c:\Users\samue\AppData\Local\Programs\Python\Python39\python.exe


In [2]:
import tensorboard
print(tensorboard.__version__)
import pandas as pd
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2


2.19.0


In [3]:
import pandas as pd
import os

# 1. Učitaj metapodatke
df = pd.read_csv('dataset/HAM10000_metadata.csv')

# 2. Definiraj gdje tražimo slike
image_folders = ['dataset/HAM10000_images_part_1', 'dataset/HAM10000_images_part_2']

def find_image_path(image_id):
    for folder in image_folders:
        path = os.path.join(folder, f'{image_id}.jpg')
        if os.path.exists(path):
            return path
    return None  # Ako ne postoji

# 3. Dodaj stupac s punim putanjama do slika
df['path'] = df['image_id'].apply(find_image_path)

# 4. Kodiranje labela (dijagnoza)
label_mapping = {label: idx for idx, label in enumerate(df['dx'].unique())}
df['label'] = df['dx'].map(label_mapping)

# 5. Provjera ima li slika koje nisu pronađene
print("Nedostajuće slike:", df['path'].isnull().sum())


Nedostajuće slike: 0


In [4]:
from sklearn.model_selection import train_test_split

# Prvo odvojimo 80% za trening, a 20% za (validacija + test)
train_df, temp_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['label'],
    random_state=42
)

# Sada 20% podijelimo po pola (10% validacija, 10% test)
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    stratify=temp_df['label'],
    random_state=42
)


In [25]:
class SkinLesionDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)  # <-- ispravno ime: df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, 'path']
        image = np.array(Image.open(img_path).convert('RGB'))
        label = self.df.loc[idx, 'label']
        if self.transform:
            transformed = self.transform(image=image)  # Albumentations expects image=image
            image = transformed['image']
        return image, label


In [26]:
from sklearn.model_selection import train_test_split

# Podjela: 80% train, 10% val, 10% test
train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

# Transformacije
IMG_SIZE = 224
imagenet_mean = (0.485, 0.456, 0.406)
imagenet_std = (0.229, 0.224, 0.225)

train_transform = A.Compose([
    A.RandomResizedCrop(size=(IMG_SIZE, IMG_SIZE), scale=(0.8, 1.0), ratio=(0.9, 1.1), p=1.0),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=30, p=0.7),
    A.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.2, hue=0.05, p=0.7),
    A.Affine(translate_percent={"x": 0.05, "y": 0.05}, scale=(0.9, 1.1), rotate=0, shear=0, p=0.5),
    A.GaussianBlur(blur_limit=(3, 7), p=0.3),
    A.Normalize(mean=imagenet_mean, std=imagenet_std),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(width=IMG_SIZE, height=IMG_SIZE),
    A.Normalize(mean=imagenet_mean, std=imagenet_std),
    ToTensorV2(),
])
test_transform = val_transform

# Dataseti
train_dataset = SkinLesionDataset(train_df, transform=train_transform)
val_dataset = SkinLesionDataset(val_df, transform=val_transform)
test_dataset = SkinLesionDataset(test_df, transform=test_transform)

# DataLoaderi
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)


In [27]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir='runs/skin_diagnosis_experiment3-augmented')


In [28]:
import torch

if torch.cuda.is_available():
    print("✅ GPU aktivan:", torch.cuda.get_device_name(0))
else:
    print("❌ GPU NIJE aktivan – koristiš CPU.")


✅ GPU aktivan: NVIDIA GeForce RTX 4060 Ti


In [29]:
import torch
import torch.nn as nn
import torchvision.models as models

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Učitaj pretrenirani EfficientNet-B0
model = models.efficientnet_b0(pretrained=True)

# Ukloni originalni klasifikacijski sloj i zamijeni vlastitim
num_features = model.classifier[1].in_features
model.classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_features, 7)  # 7 klasa za HAM10000
)

# Premjesti na GPU/CPU
model = model.to(device)




In [31]:
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Izračunaj težine
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(df['label']),
    y=df['label']
)

# Pretvori u tensor i prebaci na GPU (ako treba)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

# Dodaj u loss funkciju
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)


In [32]:
from tqdm import tqdm

def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0.0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)

            preds = torch.argmax(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    precision = precision_score(all_labels, all_preds, average='macro')

    return total_loss / len(dataloader.dataset), acc, f1, recall, precision


In [33]:
print("Ukupno slika:", len(df))
print("Trening skup:", len(train_df))
print("Validacijski skup:", len(val_df))
print("Testni skup:", len(test_df))
print("\nDistribucija klasa (trening):")
print(train_df['label'].value_counts())
print("\nDistribucija klasa (validacija):")
print(val_df['label'].value_counts())


Ukupno slika: 10015
Trening skup: 8012
Validacijski skup: 1001
Testni skup: 1002

Distribucija klasa (trening):
label
1    5364
3     890
0     879
5     411
6     262
4     114
2      92
Name: count, dtype: int64

Distribucija klasa (validacija):
label
1    670
3    111
0    110
5     51
6     33
4     14
2     12
Name: count, dtype: int64


In [34]:
best_f1 = 0.0
patience = 3
counter = 0

for epoch in range(10):
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []

    for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/10'):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        preds = torch.argmax(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    train_loss = running_loss / len(train_loader.dataset)
    train_acc = accuracy_score(all_labels, all_preds)
    train_f1 = f1_score(all_labels, all_preds, average='macro')
    train_recall = recall_score(all_labels, all_preds, average='macro')
    train_precision = precision_score(all_labels, all_preds, average='macro')

    val_loss, val_acc, val_f1, val_recall, val_precision = evaluate_model(model, val_loader)

    # TensorBoard logging
    writer.add_scalars('Loss', {'Train': train_loss, 'Val': val_loss}, epoch)
    writer.add_scalars('Accuracy', {'Train': train_acc, 'Val': val_acc}, epoch)
    writer.add_scalars('F1', {'Train': train_f1, 'Val': val_f1}, epoch)
    writer.add_scalars('Recall', {'Train': train_recall, 'Val': val_recall}, epoch)
    writer.add_scalars('Precision', {'Train': train_precision, 'Val': val_precision}, epoch)

    print(f"📊 Epoch {epoch+1}/10")
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
    print(f"Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")
    print(f"Train F1: {train_f1:.4f} | Val F1: {val_f1:.4f}")

    if val_f1 > best_f1:
        best_f1 = val_f1
        counter = 0
        torch.save(model.state_dict(), 'best_efficient_net-new_pretprocesing.pth')
        print("✅ Saved new best model")
    else:
        counter += 1
        print(f"⚠️ No improvement in F1 for {counter} epoch(s)")
        if counter >= patience:
            print("⏹️ Early stopping triggered")
            break
##$$ OVO JE JOS BOLJEŠANO S OBZIROM DA JE DODAN EARLY STOPPING

Epoch 1/10: 100%|██████████| 251/251 [01:19<00:00,  3.15it/s]


📊 Epoch 1/10
Train Loss: 1.5545 | Val Loss: 1.1025
Train Acc: 0.4539 | Val Acc: 0.6244
Train F1: 0.2781 | Val F1: 0.5064
✅ Saved new best model


Epoch 2/10: 100%|██████████| 251/251 [01:17<00:00,  3.25it/s]


📊 Epoch 2/10
Train Loss: 0.9837 | Val Loss: 0.7541
Train Acc: 0.6460 | Val Acc: 0.7103
Train F1: 0.4967 | Val F1: 0.6415
✅ Saved new best model


Epoch 3/10: 100%|██████████| 251/251 [01:17<00:00,  3.25it/s]


📊 Epoch 3/10
Train Loss: 0.8098 | Val Loss: 0.6862
Train Acc: 0.6965 | Val Acc: 0.7193
Train F1: 0.5710 | Val F1: 0.6316
⚠️ No improvement in F1 for 1 epoch(s)


Epoch 4/10: 100%|██████████| 251/251 [01:18<00:00,  3.22it/s]


📊 Epoch 4/10
Train Loss: 0.7216 | Val Loss: 0.6401
Train Acc: 0.7146 | Val Acc: 0.7882
Train F1: 0.6058 | Val F1: 0.6934
✅ Saved new best model


Epoch 5/10: 100%|██████████| 251/251 [01:19<00:00,  3.17it/s]


📊 Epoch 5/10
Train Loss: 0.6601 | Val Loss: 0.6458
Train Acc: 0.7243 | Val Acc: 0.7622
Train F1: 0.6322 | Val F1: 0.6813
⚠️ No improvement in F1 for 1 epoch(s)


Epoch 6/10: 100%|██████████| 251/251 [01:17<00:00,  3.25it/s]


📊 Epoch 6/10
Train Loss: 0.5778 | Val Loss: 0.6415
Train Acc: 0.7413 | Val Acc: 0.7782
Train F1: 0.6622 | Val F1: 0.7080
✅ Saved new best model


Epoch 7/10: 100%|██████████| 251/251 [01:18<00:00,  3.21it/s]


📊 Epoch 7/10
Train Loss: 0.5833 | Val Loss: 0.6144
Train Acc: 0.7490 | Val Acc: 0.7912
Train F1: 0.6696 | Val F1: 0.7274
✅ Saved new best model


Epoch 8/10: 100%|██████████| 251/251 [01:15<00:00,  3.31it/s]


📊 Epoch 8/10
Train Loss: 0.4901 | Val Loss: 0.6293
Train Acc: 0.7675 | Val Acc: 0.8172
Train F1: 0.7118 | Val F1: 0.7556
✅ Saved new best model


Epoch 9/10: 100%|██████████| 251/251 [01:15<00:00,  3.32it/s]


📊 Epoch 9/10
Train Loss: 0.4586 | Val Loss: 0.5957
Train Acc: 0.7777 | Val Acc: 0.8022
Train F1: 0.7135 | Val F1: 0.7284
⚠️ No improvement in F1 for 1 epoch(s)


Epoch 10/10: 100%|██████████| 251/251 [01:22<00:00,  3.06it/s]


📊 Epoch 10/10
Train Loss: 0.4424 | Val Loss: 0.5761
Train Acc: 0.7837 | Val Acc: 0.8092
Train F1: 0.7290 | Val F1: 0.7467
⚠️ No improvement in F1 for 2 epoch(s)


In [35]:
model.load_state_dict(torch.load("best_efficient_net-new_pretprocesing.pth"))
model.eval()
test_loss, test_acc, test_f1, test_recall, test_precision = evaluate_model(model, test_loader)
print(f"Test Acc: {test_acc:.4f} | Test F1: {test_f1:.4f}")


  model.load_state_dict(torch.load("best_efficient_net-new_pretprocesing.pth"))


Test Acc: 0.8174 | Test F1: 0.6918
