# Tomours classification

In [None]:
import torch

%load_ext autoreload
%autoreload 2

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")


# Variable set up

In [None]:
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

model='maxvit_t'

BATCH_SIZE = 32
EPOCHS = 50
IMAGE_SIZE = 224
learning_rate = 1e-3
DATASET_DIR = 'kidney/KidneyTumor'

weights = None
# weights = 'IMAGENET1K_V1'

transfer_learning = False
# transfer_learning = True

dropout = 0.4

train_percentage = ( 20 ) / 100
test_percentage = ( 5 ) / 100
validation_percentage = ( 5 ) / 100
discard_percentage = 1 - (train_percentage + test_percentage + validation_percentage)

image_channel = 3

dataset_root = f'G:\OneDrive\OneDrive - Universidad de Las Palmas de Gran Canaria\TFT\project\\{DATASET_DIR}'

print(f'Model: {model}')




## Dataset set up

In [None]:
import os
from collections import Counter
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, Compose, Grayscale, Resize, RandomHorizontalFlip, RandomVerticalFlip
from torchvision.io import read_image
import matplotlib.pyplot as plt
from utils import *

import os
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
])

dataset = MixDataset(dataset_dir=dataset_root, transform=transform)


if type(dataset) == MixDataset:

    train_size = int(train_percentage * len(dataset))
    test_size = int(test_percentage * len(dataset))
    validation_size = int(validation_percentage * len(dataset))

    total_size = len(dataset) - train_size - test_size - validation_size

    training, testing, validation, _ = torch.utils.data.random_split(dataset, [train_size, test_size, validation_size, total_size])

train_dataloader = DataLoader(training, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(testing, batch_size=BATCH_SIZE, shuffle=False)
validation_dataloader = DataLoader(validation, batch_size=BATCH_SIZE, shuffle=False)

X, y = next(iter(train_dataloader))
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape} {y.dtype}")

num_classes = dataset.num_classes
print('Classes:', dataset)
print('Num classes:', num_classes)
print('Training length total count:',len(train_dataloader.dataset))
print('Test length total count:', len(test_dataloader.dataset))
print('Validation length total count:', len(validation_dataloader.dataset))

# show_samples(X, y)
# draw_each_class(dataset=dataset)


## Train Vision Transformer

In [None]:

from torchinfo import summary
#from torchsummary import summary

from transformers_models import BaseModel

kwargs = {
    'weights': weights,
    'transfer_learning': transfer_learning,
    'num_classes': num_classes,
    'dropout': dropout,
}


model_instance = BaseModel(model, **kwargs)


summary(
    model=model_instance.get_model(),
)

# Memory Management

In [None]:
import gc
import torch

torch.cuda.empty_cache()
gc.enable()


# Load state from direction

In [None]:
if False:
    dir_to_model = 'saved_models\maxvit_t_1706443129724.pth'
    model.load_state_model(dir_to_model)
    

# Training

In [None]:
from torch import optim, nn
import time
import gc
from utils import convert_seconds

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam( 
        model_instance.get_model().parameters(),
        lr = learning_rate
    )

train_acc = []
train_loss = []
test_acc = []
test_loss = []
val_acc = []
val_loss = []

best_validation_epoch = 0
best_epoch = 0
best_timestamp_epoch =  0

start_time = time.time()

kwargs = {
    'validation_dataloader': validation_dataloader, 
    'val_acc': val_acc, 
    'val_loss': val_loss, 
    'device': device,
}

for epoch in range(EPOCHS):
    epoch_start_time  = time.time()
    print(f"Epoch {epoch+1}/{EPOCHS}\n-------------------------------")
    current_val_acc, current_val_loss, best_acc = model_instance.train(train_dataloader, loss_fn, optimizer, train_loss, train_acc, BATCH_SIZE, **kwargs)
    epoch_end_time  = time.time()
    epoch_time = epoch_end_time - epoch_start_time
    current_time = epoch_end_time - start_time

    if best_acc > best_validation_epoch:
        best_validation_epoch = best_acc
        best_epoch = epoch
        best_timestamp_epoch = current_time

    print(f"\nEpoch Time:                   {convert_seconds(epoch_time)}\nTraining time up to now:     {convert_seconds(current_time)} s")
    print(f"Best Epoch Time at epoch: {best_epoch} --- > --- > Validation Accuracy: {best_validation_epoch:>2f} % Timestamp: {convert_seconds(current_time)} s \n-------------------------------")


end_time = time.time()
total_time = end_time - start_time

print("Done!", "Time of training: ", convert_seconds(total_time), "Best Training accuracy: ", max(train_acc), "Best Validation Avg accuracy: ", max(val_acc))


# Print settings

In [None]:

      
print(f"Evaluating Test Error Dataset with length: {len(test_dataloader.dataset)} \n")
test_eval_acc, test_eval_loss = model_instance.evaluate(test_dataloader, loss_fn, test_loss, test_acc, device)
print(f"Test Error with length: {len(test_dataloader.dataset)}: \n Accuracy: {test_eval_acc:>0.1f}%, Average loss: {test_eval_loss:>8f} \n")

print("Done!", "Time of training: ", convert_seconds(total_time), "Best Training accuracy: ", max(train_acc), "Test accuracy: ", max(test_acc), "Best Validation accuracy: ", max(val_acc))
print(f'Dataset: {DATASET_DIR}\n' 
      f'Model: {model_instance.get_model_class()}\n'
      f'Batch size: {BATCH_SIZE}\n'
      f'Epochs: {EPOCHS}\n'
      f'Number of classes: {num_classes}\n'
      f'Image size and image channel: {image_channel}x{IMAGE_SIZE}x{IMAGE_SIZE}\n'      
      f'Learning rate: {learning_rate}\n'      
      f'Time execution: {convert_seconds(total_time)}\n'
      f'\nBest accuracy: {max(train_acc)}\n' 
      f'Best accuracy in Test Dataset: {max(test_acc)}\n'
      f'Best accuracy in Validation Dataset: {max(val_acc)}\n'
)

print(f"Best Epoch Time at epoch: {best_epoch}\nValidation Avg Accuracy: {best_validation_epoch:>2f}\nTimestamp: {convert_seconds(best_timestamp_epoch)} s ")


if train_percentage is not None and type(dataset) == MixDataset:
      print(f'Usage of Dataset for training: {train_percentage*100}%\n' 
            f'Usage of Dataset for testing: {test_percentage*100}%\n' 
            f'Usage of Dataset for validation: {validation_percentage*100}%\n' 
            f'Dataset discarded: {discard_percentage*100}%\n' 
      )



In [None]:
from utils import *

max_length = 0

print(train_loss[-1],  len (train_loss))
print(train_acc[-1],  len (train_acc))
print(val_acc[-1],  len (val_acc))
print(val_loss[-1], len (val_loss))

model_instance.draw_graphics(train_loss, train_acc, val_acc, val_loss)
model_instance.save_model()