# Treniranje i evaluiranje modela

### Importovanje potrebnih modula

In [None]:
import torch
from torch.utils.data import Dataset
import os
from torchvision import transforms, models
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import torch.nn as nn
from datetime import datetime
import numpy as np
import torch.optim as optim
from sklearn.metrics import roc_auc_score, classification_report, average_precision_score
import time
import copy

In [None]:
# Pocetak izvrsavanja
start_time = time.time()

### Definisanje imena fajlova

In [None]:
# Parametri za cuvanje fajlova
version = "third"
model_name = "resnet50"

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

model_save_name = f"./{model_name}/{model_name}_{version}_version_{timestamp}.pth"
predictions_file = f"./{model_name}/{model_name}_{version}_version_{timestamp}.csv"
text_output_file = f"./{model_name}/{model_name}_{version}_version_{timestamp}.txt"

In [None]:
# Putanje za podatke
root_directory = "./NEW/data_8_1_1"

train_directory = os.path.join(root_directory, "train")
valid_directory = os.path.join(root_directory, "valid")
test_directory = os.path.join(root_directory, "test")

train_csv = os.path.join(root_directory, "train.csv")
valid_csv = os.path.join(root_directory, "valid.csv")
test_csv = os.path.join(root_directory, "test.csv")

### Racunanje weight-ova

In [None]:
def calculate_class_weights(csv_file, classes):
    # Ucitavanje podataka
    data = pd.read_csv(csv_file)
    total_count = len(data)
    class_counts = {} 
    # Inicijalizovanje recnika za cuvanje brojaca
    for cls in classes:
        class_counts[cls] = 0
    # Iteriranje kroz skup podataka i inkrementiranje brojaca klasa
    for labels in data.iloc[:, 1]:
        for label in labels.split('|'):
            if label in class_counts:
                class_counts[label] += 1
    class_weights = []
    # Racunanje tezina
    for cls in classes:
        count = class_counts[cls]
        if count > 0:
            class_weights.append(total_count / (count * num_classes))
        else:
            class_weights.append(0)
    return torch.tensor(class_weights, dtype=torch.float32)

### Definisanje klasa, weight-ova i batch-eva

In [None]:
# Definisanje klasa
disease_classes = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema', 'Fibrosis', 'Hernia',
                    'Infiltration', 'Mass', 'No Finding', 'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']

# Izracunavanje broja izlaznih klasa
num_classes = len(disease_classes)

In [None]:
# Dohvatanje tezina
class_weights = calculate_class_weights(train_csv, disease_classes)

# Velicine batch-eva
train_batch_size = 16
valid_batch_size = 8
test_batch_size = 8

### Ucitavanje podataka

In [None]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, classes, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.classes = classes
        self.transform = transform
    def __len__(self):
        return len(self.data_frame)
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx, 0])
        # Konvertovanje jednokanalne slike u trokanalnu
        image = Image.open(img_name).convert('RGB')
        # Na prvoj poziciji u csv fajlu, nalazi se string sa labelama bolesti
        labels_str = self.data_frame.iloc[idx, 1]
        labels_set = set(labels_str.split('|'))
        # Pravljenje liste binarnih vrednosti labela
        binary_labels = []
        for cls in self.classes:
            if cls in labels_set:
                binary_labels.append(1)
            else:
                binary_labels.append(0)
        # Konvertovanje u tenzor zbog funkcije gubitka
        binary_labels = torch.tensor(binary_labels, dtype=torch.float32)
        sample = {'image': image, 'labels': binary_labels}
        if self.transform:
            sample['image'] = self.transform(sample['image'])
        return sample

In [None]:
# Transformacija za train set
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# Transformacija za valid i test set
transform_valid_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = CustomDataset(csv_file=train_csv, root_dir=train_directory, transform=transform_train, classes=disease_classes)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)

valid_dataset = CustomDataset(csv_file=valid_csv, root_dir=valid_directory, transform=transform_valid_test, classes=disease_classes)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batch_size, shuffle=False)

test_dataset = CustomDataset(csv_file=test_csv, root_dir=test_directory, transform=transform_valid_test, classes=disease_classes)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

### Upisivanje parametara u text fajl

In [None]:
# Koriscenje globalne str promenljive za cuvanje ispisa
str_output = ''

def upisi_parametre():
    global local_vars
    variable_names = [
        "model",
        "lr",
        "criterion",
        "optimizer",
        "scheduler",
        "train_batch_size",
        "valid_batch_size",
        "test_batch_size",
        "num_epochs",
        "early_stoppage_patience",
        "transform_train",
        "transform_valid_test",
        "fc_layer",
        "model_save_name",
        "predictions_file",
        "text_output_file",
        "class_weights",
        "root_directory",
        "disease_classes"
    ]
    global str_output
    str_output = f"TESTIRANJE MODELA: {model_name} , VERZIJA: {version} , TIMESTAMP: {timestamp}\n"
    str_output += "-------------------------------------------------------------------------------\n"
    str_output += "-------------------------------------------------------------------------------\n"
    str_output += "-------------------------------------------------------------------------------\n\n\n"
    str_output += "PARAMETRI"
    str_output += "-------------------------------------------------------------------------------\n"
    for var_name in variable_names:
        var_value = local_vars[var_name]
        str_output += f"{var_name}: {var_value}\n"
    str_output += "-------------------------------------------------------------------------------\n\n\n\n\n"     

# Cuvanje ispisa uz printovanje
def print_out(text):
    print(text)
    global str_output
    str_output += text + '\n'

### Treniranje

In [None]:
# Funkcija za treniranje jedne epohe
def train_one_epoch():
    running_loss = 0.
    last_loss = 0.
    total_running_loss = 0.
    # Iteriranje po turama skupa
    for i, data in enumerate(train_loader):
        # Dohvatanje slika i labela
        inputs = data['image']
        labels = data['labels'].clone().detach()
        # Spustanje podataka na graficku kartu
        inputs = inputs.to(device)
        labels = labels.to(device)
        # Ciscenje prethodnih gradijenata
        optimizer.zero_grad()
        # Prosledjivanje slika modelu
        outputs = model(inputs)
        # Racunanje gubitka
        loss = criterion(outputs, labels)
        # Izracunavanje gradijenata
        loss.backward()
        # Azuriranje parametara mreze
        optimizer.step()
        # Sabiranje gubitaka i ispisivanje na svakih 1000 batch-eva
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000
            print_out('    batch {} loss: {}'.format(i + 1, last_loss))
            running_loss = 0.

    # Racunanje prosecnog trening gubitka za celu epohu
    avg_total_running_loss = total_running_loss / (i + 1);
    
    return avg_total_running_loss

#### Definisanje trening parametara

In [None]:
#from torchvision.models import VGG16_Weights
from torchvision.models import ResNet50_Weights
#from torchvision.models import DenseNet121_Weights

In [None]:
# Inicijalizacija modela
#model = models.vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
#model = models.densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1)

# Podesavanje poslednjeg sloja
fc_layer = nn.Sequential(
    nn.Linear(model.fc.in_features, model.fc.in_features // 2),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(model.fc.in_features // 2, num_classes)
)
model.fc = fc_layer

# Inicijalizacija stepena ucenja
lr = 0.00001;

# Inicijalizacija funckije gubitka
criterion = nn.BCEWithLogitsLoss()#pos_weight=class_weights)

# Inicijalizacija GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Spustanje modela na GPU
model.to(device)
# Inicijalizacija optimizacione funkcije
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

# Inicijalizacija smanjivaca stepena ucenja
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

# Inicijalizacija broja epoha
num_epochs = 1

# Inicijalizacija strpljivosti ranog zaustavljanja
early_stoppage_patience = 5

In [None]:
# Upisivanje parametara
local_vars = vars()

upisi_parametre()

#### Trening petlja

In [None]:
# Spustanje funkcije gubitka na GPU
criterion = criterion.to(device)

# Inicijalizacija brojaca ranog zaustavljanja
early_stoppage_counter = 0

# Inicijalni najbolji gubitak
best_vloss = 1_000_000.

# Oslobadjanje cache memorije na GPU
torch.cuda.empty_cache()

# Treniranje po epohama
for epoch in range(num_epochs):
    print_out(f"EPOCH {epoch + 1}:")
    # Postavljanje modela u mod za treniranje
    model.train(True)
    
    avg_loss = train_one_epoch()
    
    running_vloss = 0.0
    
    # Postavljanje modela u mod za evaluaciju
    model.eval()

    with torch.no_grad():
        for i, vdata in enumerate(valid_loader):
            # Dohvatanje slika i labela
            vinputs = vdata['image']
            vlabels = vdata['labels'].clone().detach()
            # Spustanje podataka na graficku kartu
            vinputs = vinputs.to(device)
            vlabels = vlabels.to(device)
            # Prosledjivanje slika modelu
            voutputs = model(vinputs)
            # Racunanje gubitka
            vloss = criterion(voutputs, vlabels)
            running_vloss += vloss.item()

    # Izracunavanje prosecnog validacionog gubitka za celu epohu
    avg_vloss = running_vloss / (i + 1)
    
    print_out(f"  LOSS train {avg_loss} valid {avg_vloss}")

    # Generisanje novog stepena ucenja
    scheduler.step(avg_vloss)
    print_out(f"  Scheduler generated lr: {scheduler.get_last_lr()}")
    # Poredjenje trenutnog najmanjeg gubitka
    if avg_vloss < best_vloss:
        # Cuvanje modela i resetovanje brojaca
        best_vloss = avg_vloss
        early_stoppage_counter = 0
        torch.save(model.state_dict(), model_save_name)
    else:
        # Inkrementiranje brojaca
        early_stoppage_counter += 1
        if early_stoppage_counter >= early_stoppage_patience:
            # Prekidanje treninga
            print_out('Early stoppage')
            break
            

# Evaluacija

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Inicijalizacija modela
model_test = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
#model_test = models.vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
#model_test = models.densenet121(weights=DenseNet121_Weights.IMAGENET1K_V1)

model_test.fc = nn.Sequential(
    nn.Linear(model_test.fc.in_features, model_test.fc.in_features // 2),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(model_test.fc.in_features // 2, num_classes)
)

# Ucitavanje modela po sacuvanom imenu
model_test.load_state_dict(torch.load(model_save_name))

# Prebacivanje modela u mod za evaluaciju i spustanje na GPU
model_test.eval()
model_test.to(device)

# Pomocne strukture
test_probabilities = []
test_predictions = []
true_labels = []

# Inicijalizacija praga
treshold = 0.7
 
# Iteriranje po turama skupa
with torch.no_grad():
    for i, data in enumerate(test_loader):
        # Dohvatanje slika i labela
        images = data['image']
        images = images.to(device)
        # Spustanje pdoataka na graficku kartu
        labels = data['labels']
        #labels = labels.to(device)
        # Prosledjivanje slika modelu
        outputs = model_test(images)
        # Transliranje u verovatnoce
        probabilities = torch.sigmoid(outputs)
        # Transliranje u binarne labele
        predicted = (probabilities > treshold).int()

        # Cuvanje izracunatih vrednosti
        test_probabilities.extend(probabilities.cpu().numpy())
        test_predictions.extend(predicted.cpu().numpy())
        true_labels.extend(labels.numpy())

# Provera velicina struktura
assert len(true_labels) == len(test_predictions) == len(test_probabilities), "Lists have different lengths!"

# Konvertovanje u nizove
true_labels = np.array(true_labels)
test_predictions = np.array(test_predictions)
test_probabilities = np.array(test_probabilities)

# Racunanje classification_report
report = classification_report(true_labels, test_predictions, zero_division=0, digits=2)
print_out("Classification Report:")
print_out(report)

# Racunanje AUC i PR AUC metrika
auc = roc_auc_score(true_labels, test_probabilities, average='macro')
pr_auc = average_precision_score(true_labels, test_probabilities, average='macro')
# Ukupni rezultati
overall_metrics = (
    f"Overall Metrics:\n"
    f"AUC Score: {auc:.4f}\n"
    f"PR_AUC Score: {pr_auc:.4f}\n"
)

print_out(overall_metrics)

# Rezultati po klasi
per_class_metrics = "\nPer-Class AUC Metrics:\n"
# Iteriranje po klasama
for i, class_name in enumerate(disease_classes):
    # Dohvatanje kolone klase
    auc_per_class = roc_auc_score(true_labels[:, i], test_probabilities[:, i])
    pr_auc_per_class = average_precision_score(true_labels[:, i], test_probabilities[:, i])
    per_class_metrics += (
        f"Class {class_name} AUC: {auc_per_class:.4f}, PR_AUC: {pr_auc_per_class:.4f}\n"
    )
print_out(per_class_metrics)

# Upisivanje binarnih predikcija i verovatnoca u csv file
map_predictions = [list(map(int, preds)) for preds in test_predictions]
map_probabilities = [list(map(float, probs)) for probs in test_probabilities]
map_results = pd.DataFrame({ 'True Labels': [','.join(map(str, true)) for true in true_labels], 
                                  'Predicted Labels': [','.join(map(str, pred)) for pred in map_predictions], 
                                  'Probabilities': [','.join(map(str, prob)) for prob in map_probabilities] })

# Cuvanje csv fajla
map_results.to_csv(predictions_file, index=False, header=False)

# Zavrsetak programa

In [None]:
# Racunanje vremena izvrsavanja
end_time = time.time()

execution_time = end_time - start_time

hours, remainder = divmod(execution_time, 3600)
minutes, seconds = divmod(remainder, 60)

print_out(f"Execution time: {int(hours):02}:{int(minutes):02}:{int(seconds):02}")

# Upisivanje celokupnog ispisa u text output fajl
with open(text_output_file, 'w') as f:
    f.write(str_output)