In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import numpy as np

from transformers import ViTForImageClassification, ViTImageProcessor
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dir="dermnet/train"
val_dir="dermnet/test"

#ViT - pretrenirani base model koji dijeli sliku na 16x16, ocekuje da ulazne slike imaju dimenzije 224x224 piksela i treniran je na ImageNet-21k skupu
image_processor=ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

#Transformacija slika, 224x224, pretvara u tenzor format koji PyTorch koristi za sve podatke, te normalizuje vrijednosti piksela po kanalima (R,G,B)
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std) 
    #Nakon sto se slika pretvori u tenzor oduzmi srednju vrijednost (mean) i podijeli sa standardnom devijacijom, 
    #da bi slike bile u istom rasponu kao slike na kojima je ViT treniran
    #sto pomaze tacnosti
    #Npr. ako je image_processor.image_mean = [0.5, 0.5, 0.5]
    #image_processor.image_std = [0.5, 0.5, 0.5], onda:
    #piksel vrijednosti 0.6 na R kanalu postaje:
    #(0.6 - 0.5) / 0.5 = 0.2
])

#Ucitavamo dataset
train_dataset = datasets.ImageFolder(root = train_dir, transform = transform)
val_dataset = datasets.ImageFolder(root = val_dir, transform = transform)


train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, pin_memory=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, pin_memory=True, num_workers=2)


#detektujemo nazive foldera
class_names = train_dataset.classes 
#broji koliko ima razlicitih klasa
num_classes = len(class_names)
#Ispisujemo klase
print(f"Classes: {class_names}")

model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels = num_classes
)
#premjestamo model na cpu ili gpu kako bi model i svi njegovi parametri bili na istom device-u
model.to(device)

criterion = nn.CrossEntropyLoss()
#AdamW - algoritam optimizacije. Cilj mu je da efikasno i stabilno pronađe najbolje tezine modela tako sto smanjuje gresku tokom treniranja
# pomaze da model ne preuzi podatke, lr=2e-5 je learning rate(stopa ucenja) i iznosi 0.00002 - koliko brzo optimizator prilagođava parametre
optimizer = optim.AdamW(model.parameters(), lr=2e-5)


def train(model, train_loader, val_loader, epochs=10):
    train_losses, val_losses, train_accs, val_accs =[], [], [], []

    for epoch in range(epochs):
        model.train() #prebacujemo model u trening rezim, omogucava da se specijalni slojevi poput dropout i 
        #batch normalization ponasaju kako treba tokom treninga
        running_loss, correct, total = 0.0, 0, 0
        brojac = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            brojac+=1
            print(f"Batch počinje... {brojac}")
            optimizer.zero_grad()
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            torch.cuda.empty_cache()

            running_loss+= loss.item()
            _, predicted = torch.max(outputs, 1)
            #zbir tacno klasifikovanih primjera u ovom batchu
            correct += (predicted == labels).sum().item()
            #zbir primjera obrađenih u batchu
            total += labels.size(0) 
        
        train_loss = running_loss / len(train_loader) #prosjecan gubitak po batchu
        train_acc = correct/total #ukupna tacnost 

        #prebacujemo model u evaluacijski režim
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0

        with torch.no_grad(): #racunanje sve unutar bloka bez pracenja gradijenata da ne bi trosili resurse na nepotrebno racunanje gradijenata
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images).logits
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                val_correct += (predicted == labels).sum().item()
                val_total += labels.size(0)

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        #Dodajemo teenutne vrijednosti gubitaka i tacnosti za trening i validaciju  u odgovarajuce lsite da bi ih kasnije mogao graficki prikazati i analizirati
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}, Validation loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")

    return train_losses, val_losses, train_accs, val_accs

print("CUDA dostupna:", torch.cuda.is_available())
print("Trenutni uređaj:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

import time

start_time = time.time()
train_losses, val_losses, train_accs, val_accs = train(model, train_loader, val_loader, epochs=2)
end_time = time.time()

elapsed = end_time - start_time




