In [9]:
!pip install flake8
!pip install pycodestyle pycodestyle_magic
import pycodestyle
import flake8
%load_ext pycodestyle_magic


The pycodestyle_magic extension is already loaded. To reload it, use:
  %reload_ext pycodestyle_magic


In [10]:
%%pycodestyle
import os
import numpy as np
import imageio
import pandas as pd
from sklearn.model_selection import train_test_split
from typing import Tuple, Dict, Union


def build_dataset(folder_path: str) -> pd.DataFrame:
    """
    Build a dataset from images stored in subdirectories.

    Args:
    - folder_path (str): Path to the folder containing
    subdirectories with images.

    Returns:
    - dataset (pd.DataFrame): DataFrame containing image
    details (name, path, label).
    """
    data = {'name': [], 'path': [], 'label': []}

    # Iterate through subdirectories (labels)
    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)

        # Check if the item is a directory
        if os.path.isdir(label_path):
            # Iterate through image files in the subdirectory
            for image in os.listdir(label_path):
                image_path = os.path.join(label_path, image)

                # Check if the item is a file and is an image file
                if (os.path.isfile(image_path) and
                   image.lower().endswith(('.png', '.jpg', '.jpeg'))):
                    # Append image details to the dataset
                    data['name'].append(image)
                    data['path'].append(image_path)
                    data['label'].append(label)

    # Create a DataFrame from the collected data
    dataset = pd.DataFrame(data)
    return dataset


def make_dataset_by_folder(base_path: str,
                           label_column: str
                           ) -> Union[
                            pd.DataFrame,
                            pd.DataFrame,
                            pd.DataFrame]:
    """
    Create a dataset by splitting images into training, testing,
    and validation sets based on folders.

    Args:
    - base_path (str): Path to the base folder containing subdirectories
    with images.
    - label_column (str): Name of the column containing labels in
    the dataset.

    Returns:
    - Tuple of DataFrames: Three DataFrames representing the training,
    testing, and validation sets.
    """
    dataset = build_dataset(base_path)
    train_df, test_df, val_df = split_dataset_by_label(
        dataset,
        label_column=label_column,
        train_size=0.8,
        test_size=0.1,
        val_size=0.1)
    compare_label_counts(dataset,
                         train_df,
                         desired_proportion=0.8)
    compare_label_counts(dataset,
                         test_df,
                         desired_proportion=0.1)
    compare_label_counts(dataset,
                         val_df,
                         desired_proportion=0.1)

    return train_df, test_df, val_df


def make_dataset_by_df(paths_image: Tuple[str, str],
                       paths_df: Tuple[str, str],
                       label_column: str
                       ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Create a dataset by combining image paths with DataFrame paths.

    Args:
    - paths_image (Tuple[str, str]): Tuple containing paths
    to image folders for training and testing.
    - paths_df (Tuple[str, str]): Tuple containing paths
    to DataFrames for training and testing.
    - label_column (str): Name of the column containing
    labels in the dataset.

    Returns:
    - Tuple of DataFrames: Three DataFrames representing the training,
    testing, and validation sets.
    """
    train_df = pd.read_csv(paths_df[0])
    train_df = add_image_paths_to_dataframe(train_df,
                                            paths_image[0],
                                            column_name='image_id')

    test_df = pd.read_csv(paths_df[1])
    test_df = add_image_paths_to_dataframe(test_df,
                                           paths_image[1],
                                           column_name='image_id')

    train_df, val_df = split_dataset_by_label(train_df,
                                              label_column=label_column,
                                              train_size=0.9,
                                              test_size=0,
                                              val_size=0.1,
                                              return_test=False)

    return train_df, test_df, val_df


def split_dataset_by_label(dataframe: pd.DataFrame,
                           train_size: float,
                           test_size: float,
                           val_size: float,
                           label_column: str = "label",
                           return_test: bool = True
                           ) -> Union[
                               pd.DataFrame,
                               pd.DataFrame,
                               pd.DataFrame]:
    """
    Split the dataset into training, testing, and validation sets
    based on labels.

    Args:
    - dataframe (pd.DataFrame): DataFrame containing the dataset.
    - train_size (float): Proportion of the dataset to include
    in the training set.
    - test_size (float): Proportion of the dataset to include
    in the testing set.
    - val_size (float): Proportion of the dataset to include
    in the validation set.
    - label_column (str, optional): Name of the column containing
    labels in the dataset (default: "label").
    - return_test (bool, optional): Whether to return the testing
    set (default: True).

    Returns:
    - Union of DataFrames: Three DataFrames representing the training,
    testing, and validation sets.
    """
    # Shuffle the dataframe
    shuffled_data = (dataframe.sample(frac=1, random_state=42)
                     .reset_index(drop=True))

    # Calculate the counts of each label
    label_counts = shuffled_data[label_column].value_counts()

    train_data = pd.DataFrame()
    test_data = pd.DataFrame()
    val_data = pd.DataFrame()

    # Iterate through unique labels
    for label in label_counts.index:
        label_data = shuffled_data[shuffled_data[label_column] == label]
        train, test_val = train_test_split(label_data,
                                           test_size=(1 - train_size),
                                           random_state=42)

        if return_test:
            test, val = train_test_split(test_val,
                                         test_size=(val_size /
                                                    (val_size + test_size)),
                                         random_state=42)
            test_data = pd.concat([test_data, test])

            if val_size > 0:
                val_data = pd.concat([val_data, val])
        else:

            if val_size > 0:
                val_size_label = int(len(test_val) *
                                     (val_size / (val_size + test_size)))
                val_data = pd.concat([val_data,
                                      test_val.sample(n=val_size_label,
                                                      random_state=42)])

        train_data = pd.concat([train_data, train])

    if return_test and val_size > 0:
        return train_data, test_data, val_data

    return train_data, val_data


def compare_label_counts(original_df: pd.DataFrame,
                         train_df: pd.DataFrame,
                         desired_proportion: float,
                         label_column: str = 'label') -> None:
    """
    Compare label counts between original and training datasets.

    Args:
    - original_df (pd.DataFrame): Original DataFrame
    containing the dataset.
    - train_df (pd.DataFrame): DataFrame representing
    the training set.
    - desired_proportion (float): Desired proportion
    of each label in the training set.
    - label_column (str, optional): Name of the column
    containing labels in the dataset (default: 'label').

    Returns:
    - None
    """
    label_counts_original = original_df[label_column].value_counts()

    for label, _ in label_counts_original.items():
        expected_count = int(len(original_df.loc[
            original_df[label_column] == label]) * desired_proportion)
        actual_count = len(train_df.loc[train_df[label_column] == label])

        if expected_count == actual_count:
            print("Is equal")
        else:
            print(f"Label '{label}' is not equal. Expected"
                  f" - {expected_count}, Actual - {actual_count}")


def add_image_paths_to_dataframe(dataframe: pd.DataFrame,
                                 folder_path: str,
                                 column_name: str
                                 ) -> pd.DataFrame:
    """
    Add image paths to a DataFrame based on image IDs and a folder path.

    Args:
    - dataframe (pd.DataFrame): DataFrame containing image IDs.
    - folder_path (str): Path to the folder containing the images.
    - column_name (str): Name of the column containing image IDs
    in the DataFrame.

    Returns:
    - dataframe (pd.DataFrame): DataFrame with added 'path' column
    containing image paths.
    """
    image_paths = []

    # Get the list of image files in the folder
    image_files = os.listdir(folder_path)
    image_files = [f for f in image_files if f.endswith('.jpg')]

    # Create a dictionary mapping image_id to image file names in the folder
    image_id_to_file = {file.split('.')[0]: file for file in image_files}

    # Iterate through 'image_id' column in the DataFrame
    for image_id in dataframe[column_name]:
        # Check if the image_id exists in the dictionary mapping
        if image_id in image_id_to_file:
            image_file = os.path.join(folder_path, image_id_to_file[image_id])
            image_paths.append(image_file)
        else:
            image_paths.append(None)  # If image doesn't exist, insert None

    # Add a new column 'path' with the image paths to the DataFrame
    dataframe['path'] = image_paths

    return dataframe


def get_label_counts_and_print(dataframe: pd.DataFrame,
                               label_column: str
                               ) -> Dict[str, int]:
    """
    Get the counts of each label in the dataframe and print the results.

    Args:
    - dataframe (pd.DataFrame): DataFrame containing the dataset.
    - label_column (str): Name of the column containing labels in the dataset.

    Returns:
    - label_counts (Dict[str, int]): Dictionary containing label counts.
    """
    total_images = len(dataframe)  # Total number of images in the dataframe
    label_counts = dataframe[label_column].value_counts().to_dict()

    print(f"Total number of images: {total_images}")
    print(f"Number of unique labels: {len(label_counts)}")
    for label, count in label_counts.items():
        print(f"Label '{label}' has {count} images.")

    return label_counts


def calculate_image_statistics(dataframe: pd.DataFrame, path_column: str = 'path') -> Tuple[int, int, Dict[str, List[Tuple[float, float]]]]:
    """
    Calculate statistics for the images in the DataFrame.

    Args:
    - dataframe (pd.DataFrame): DataFrame containing image paths.
    - path_column (str, optional): Name of the column containing image paths (default: 'path').

    Returns:
    - smallest_pixel (int): Smallest pixel value found in the images.
    - largest_pixel (int): Largest pixel value found in the images.
    - channel_values (Dict[str, List[Tuple[float, float]]]): Channel-wise mean and standard deviation values.
    """
    smallest_pixel = float('inf')
    largest_pixel = 0
    total_images = 0
    channel_values = {'R': [], 'G': [], 'B': []}
    # Iterate through each image in the dataframe
    for _, row in dataframe.iterrows():
        image_path = row[path_column]
        if image_path.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Read the image using imageio
            image = imageio.imread(image_path)

            # Check if the image is grayscale or RGB
            if len(image.shape) == 2:  # Grayscale image
                min_pixel = np.min(image)
                max_pixel = np.max(image)
                smallest_pixel = min(smallest_pixel, min_pixel)
                largest_pixel = max(largest_pixel, max_pixel)
                total_images += 1
                continue
   
            if len(image.shape) == 3 and image.shape[2] == 4:  # RGBA image
                image = image[:, :, :3]  # Remove alpha channel

            # Check for smallest and largest pixel value
            min_pixel = np.min(image)
            max_pixel = np.max(image)
            smallest_pixel = min(smallest_pixel, min_pixel)
            largest_pixel = max(largest_pixel, max_pixel)

            # Extract channel-wise values
            channels = np.dsplit(image, image.shape[-1])
            for i, channel in enumerate(channels):
                mean_val = np.mean(channel)
                std_val = np.std(channel)

                if i == 0:
                    channel_label = 'R'
                elif i == 1:
                    channel_label = 'G'
                else:
                    channel_label = 'B'

                channel_values[channel_label].append((mean_val, std_val))

            total_images += 1

    return smallest_pixel, largest_pixel, channel_values


def print_image_statistics(smallest_pixel: int,
                           largest_pixel: int,
                           total_images: int,
                           channel_values: Dict
                           )-> None:
    """
    Print the image statistics.

    Args:
    - smallest_pixel (int): Smallest pixel value found in the images.
    - largest_pixel (int): Largest pixel value found in the images.
    - total_images (int): Total number of images processed.
    - channel_values (Dict[str, List[Tuple[float, float]]]): Channel-wise
    mean and standard deviation values.
    """
    print(f"Smallest pixel value: {smallest_pixel}")
    print(f"Largest pixel value: {largest_pixel}")
    print(f"Total images processed: {total_images}")

    if not channel_values:
        print("No images were processed.")
    else:
        print("Channel Statistics:")
        for channel, values in channel_values.items():
            print(f"Channel '{channel}':")
            for i, (mean, std) in enumerate(values):
                print(f"  Image {i+1} - Mean: {mean},"
                      f" Standard Deviation: {std}")


def format_image_statistics(smallest_pixel: int,
                            largest_pixel: int,
                            total_images: int,
                            channel_values: Dict
                            ) -> Dict:
    """
    Format the image statistics into a dictionary.

    Args:
    - smallest_pixel (int): Smallest pixel value found in the images.
    - largest_pixel (int): Largest pixel value found in the images.
    - total_images (int): Total number of images processed.
    - channel_values (Dict): Channel-wise mean and standard deviation values.

    Returns:
    - image_stats (Dict): Dictionary containing image statistics.
    """
    is_gray = len(channel_values) == 1 and 'R' in channel_values
    if is_gray:
        return {
            'smallest_pixel_value': smallest_pixel,
            'largest_pixel_value': largest_pixel,
            'total_images': total_images,
            'channel_statistics': {},
            'channels': 1
        }

    # Calculate average and standard deviation per channel
    channel_stats = {}
    for channel, values in channel_values.items():
        avg = np.mean([val[0] for val in values])
        std_dev = np.mean([val[1] for val in values])
        channel_stats[channel] = {'average': avg, 'std_dev': std_dev}

    return {
        'smallest_pixel_value': smallest_pixel,
        'largest_pixel_value': largest_pixel,
        'total_images': total_images,
        'channel_statistics': channel_stats,
        'channels': 3
    }


def image_analysis(dataframe: pd.DataFrame,
                   path_column: str = 'path'
                   ) -> Dict:
    """
    Analyze the images in the DataFrame and extract image statistics.

    Args:
    - dataframe (pd.DataFrame): DataFrame containing image paths.
    - path_column (str, optional): Name of the column containing
    image paths (default: 'path').

    Returns:
    - image_stats (Dict): Dictionary containing image statistics.
    """
    smallest_pixel, largest_pixel, channel_values = (
        calculate_image_statistics(dataframe, path_column))
    print_image_statistics(smallest_pixel,
                           largest_pixel,
                           len(dataframe),
                           channel_values)
    image_stats = format_image_statistics(smallest_pixel,
                                          largest_pixel,
                                          len(dataframe),
                                          channel_values)
    return image_stats


def check_images_existence(dataframe: pd.DataFrame,
                           path_column: str = 'path'
                           ) -> pd.DataFrame:
    """
    Check the existence of image files in the specified folder path.

    Args:
    - dataframe (pd.DataFrame): DataFrame containing image paths.
    - path_column (str, optional): Name of the column containing
    image paths (default: 'path').

    Returns:
    - cleaned_dataframe (pd.DataFrame): DataFrame with removed
    rows containing non-existent image paths.
    """
    cleaned_dataframe = dataframe.copy()
    removed_images = []

    # Iterate through each row in the DataFrame
    for index, row in dataframe.iterrows():
        image_path = row[path_column]

        if image_path is None or not os.path.exists(image_path):
            removed_images.append(row)
            cleaned_dataframe.drop(index, inplace=True)
            print(f"Image not found in folder: {image_path}")

    # Print removed lines
    if removed_images:
        print("Removed lines:")
        for row in removed_images:
            print(row)

    return cleaned_dataframe


def analyze_image_shapes(dataframe: pd.DataFrame,
                         min_shape: Tuple[int, int],
                         path_column: str = 'path'
                         ) -> Dict[str, int]:
    """
    Analyze the shapes of images in the DataFrame.

    Args:
    - dataframe (pd.DataFrame): DataFrame containing image paths.
    - min_shape (Tuple[int, int]): Minimum shape required for images.
    - path_column (str, optional): Name of the column containing
    image paths (default: 'path').

    Returns:
    - image_stats (Dict[str, int]): Dictionary containing image
    shape statistics.
    """
    total_images = 0
    total_height = 0
    total_width = 0
    smaller_than_x_count = 0

    # Iterate through each row in the dataframe
    for _, row in dataframe.iterrows():
        image_path = row[path_column]
        if image_path.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Read the image using imageio
            image = imageio.imread(image_path)
            if len(image.shape) == 2:  # Check if image is grayscale
                height, width = image.shape
            else:  # Color image (RGB)
                height, width, _ = image.shape

            total_images += 1
            total_height += height
            total_width += width

            # Check if the image shape is smaller than min_shape
            if height < min_shape[0] or width < min_shape[1]:
                smaller_than_x_count += 1

    # Calculate the average shape of the images
    average_height = total_height / total_images if total_images > 0 else 0
    average_width = total_width / total_images if total_images > 0 else 0

    # Print the results
    print(f"Average image shape - Height: "
          f"{average_height}, Width: {average_width}")

    print("Number of images with shape smaller "
          f"than {min_shape}: {smaller_than_x_count}")

    # Return results as a dictionary
    return {
        'average_height': average_height,
        'average_width': average_width,
        'smaller_than_x_count': smaller_than_x_count
    }


300:80: E501 line too long (140 > 79 characters)
306:80: E501 line too long (95 > 79 characters)
311:80: E501 line too long (109 > 79 characters)
332:1: W293 blank line contains whitespace
366:29: E225 missing whitespace around operator
556:1: W391 blank line at end of file


# Imports

In [3]:
import os  
import glob
import sklearn
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import multilabel_confusion_matrix


import PIL 
import random
import numpy as np
import matplotlib.pyplot as plt 

from tool_preprocessing import *

# Preprocessing

In this notebook, labels are initially considered as categorical.

## Manual Part

If the images are organized in the folders of each label, the following flag must be True

Grande Osmar, deixa a flag como True mesmo

In [4]:
flag_folder_sep = True

In [5]:
if flag_folder_sep:
    base_path = input("Qual é o caminho do dataset?")
    # base_path = 'C:/Users/lucas/OneDrive - unb.br/Documents/UnB/Semestres-ENE/TCC/COVID_Dataset_original'
    results_path = input("Qual será o caminho para os resultados?")
    # results_path = f"C:/Users/Lucas/medical_images_models/results_COVID"
else:
    # base_path = 'C:/Users/lucas/OneDrive - unb.br/Documents/UnB/Semestres-ENE/TCC/The HAM10000 dataset'
    # results_path = f"C:/Users/Lucas/medical_images_models/results_HAM"
    base_path = input("Qual é o caminho do dataset?")
    results_path = input("Qual será o caminho para os resultados?")


In [6]:
if flag_folder_sep :
    label_column = 'label'
    train_df, test_df, val_df  = make_dataset_by_folder(base_path=base_path, label_column=label_column)

else:
    
    path_train_df = f'{base_path}/HAM10000_metadata'
    path_test_df = f'{base_path}/test.csv'
    
    path_train = f"{base_path}/treino"
    path_test = f"{base_path}/test"
    
    paths_image = [path_train, path_test]
    paths_df = [path_train_df, path_test_df]
    label_column = 'dx'
    
    train_df, test_df, val_df = make_dataset_by_df(paths_image, paths_df, label_column=label_column)
    
    

nv - 671
mel - 112
bkl - 110
bcc - 52
akiec - 33
vasc - 15
df - 12


## Analysis

### Train

In [7]:
train_df = check_images_existence(train_df, path_column='path')

In [10]:
train_df.to_csv("teste.csv")

In [8]:
image_analysis_train = image_analysis(train_df)

KeyboardInterrupt: 

In [None]:
dict_train_qntd = get_label_counts_and_print(train_df, label_column=label_column)
shapes_train = analyze_image_shapes(train_df, min_shape=(800, 800), path_column='path')

Total number of images: 9010
Number of unique labels: 7
Label 'nv' has 6034 images.
Label 'mel' has 1001 images.
Label 'bkl' has 989 images.
Label 'bcc' has 462 images.
Label 'akiec' has 294 images.
Label 'vasc' has 127 images.
Label 'df' has 103 images.
Average image shape - Height: 450.0, Width: 600.0
Number of images with shape smaller than (800, 800): 9010


In [None]:
dict_train_qntd

{'nv': 6034,
 'mel': 1001,
 'bkl': 989,
 'bcc': 462,
 'akiec': 294,
 'vasc': 127,
 'df': 103}

### Test

In [None]:
test_df = check_images_existence(test_df, path_column='path')

In [None]:
image_analysis_test = image_analysis(test_df)

Smallest pixel value: 0
Largest pixel value: 255
Total images processed: 1511
Channel Statistics:
Channel 'R':
  - Average: 193.96235187636344
  - Standard Deviation: 24.606448726550262
Channel 'G':
  - Average: 141.7550379243572
  - Standard Deviation: 31.9625774054364
Channel 'B':
  - Average: 147.87214814814814
  - Standard Deviation: 35.78649271231254


In [None]:
dict_test_qntd = get_label_counts_and_print(test_df, label_column=label_column)
shapes_test = analyze_image_shapes(test_df, min_shape=(300, 300), path_column='path')

### Validation

In [None]:
val_df = check_images_existence(val_df, path_column='path')

In [None]:
image_analysis_val = image_analysis(val_df)

In [None]:
dict_val_qntd = get_label_counts_and_print(val_df, label_column=label_column)
shapes_val = analyze_image_shapes(val_df, min_shape=(461, 601), path_column='path')

## Model Preparation

In [None]:
from model_preprocessing import *

Passar de categorial para binário 

Pesos para a loss

### Categorial to number

In [None]:
labels_dict = labels2dict(train_df, label_column)
labels_dict

In [None]:
train_label, test_label, val_label = dflabel2number([train_df, test_df, val_df], labels_dict, label_column)

### Weights

In [None]:
if len(labels_dict) == 1:
    weights = calculate_weights(train_df, labels_dict, dict_train_qntd)
    weights = max(weights)
else:
    weights = calculate_weights(train_df, labels_dict, dict_train_qntd)
    print(weights)

# Model

In [None]:
from models import *
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms

## Dataset Class

In [None]:
class CT_Dataset(Dataset):
    def __init__(self, img_path, img_labels, channels, img_transforms=None):
        self.img_path = img_path
        self.img_labels = torch.Tensor(img_labels)
        if channels == 1:
            self.transforms = transforms.Compose([transforms.Grayscale(),
                                                #   transforms.Resize((250, 250)),
                                                  transforms.ToTensor()])
        elif channels == 3:
            self.transforms = transforms.Compose([#transforms.Resize((250, 250)),
                                                  transforms.ToTensor()])
        else:
            self.transforms = img_transforms
    
    def __getitem__(self, index):
        # load image
        cur_path = self.img_path[index]
        cur_img = PIL.Image.open(cur_path).convert('RGB')
        cur_img = self.transforms(cur_img)

        return cur_img, self.img_labels[index]
    
    def __len__(self):
        return len(self.img_path)

## GPU

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device 

In [None]:
print("Current GPU memory usage:", torch.cuda.memory_allocated() / (1024 ** 2), "MB")
print("Max GPU memory usage:", torch.cuda.max_memory_allocated() / (1024 ** 2), "MB")

torch.cuda.empty_cache()

## Random Seed

In [None]:
random_seed = 124
np.random.seed(random_seed)

torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True

## Training

In [None]:
try:
    mean_R = image_analysis_val['channel_statistics']['R']['average']
    mean_G = image_analysis_val['channel_statistics']['G']['average']
    mean_B = image_analysis_val['channel_statistics']['B']['average']
    channels = 1 if mean_R == mean_G == mean_B else 3
    
except KeyError:
    channels = image_analysis_val['channels']

In [None]:
train_dataset = CT_Dataset(img_path=np.array(train_df['path']), img_labels=np.array(train_label), channels=channels)
val_dataset = CT_Dataset(img_path=np.array(val_df['path']), img_labels=np.array(val_label), channels=channels)
test_dataset = CT_Dataset(img_path=np.array(test_df['path']), img_labels=np.array(test_label), channels=channels)

In [None]:
from trainer import *

In [None]:
batch_size = 4
Epochs = 20



# model_kernel = VGG16(num_classes=len(labels_dict), input_channels=channels)
# model_kernel = ResNet50(num_classes=len(labels_dict), input_channels=channels)
# model_kernel = ResNet101(num_classes=len(labels_dict), input_channels=channels)
# model_kernel = EfficientNetB0(num_classes=len(labels_dict), input_channels=channels)
# model_kernel = EfficientNetB4(num_classes=len(labels_dict), input_channels=channels)
model_kernel = EfficientNetB7(num_classes=len(labels_dict), input_channels=channels)



trainer = ModelTrainer(model_kernel, device, weights, labels_dict, train_dataset, val_dataset, test_dataset, batch_size= batch_size, epochs=Epochs)

In [None]:
trainer.loader()
trainer.loss_function()
trainer.optimizer_step()
print("Training Start:")
for epoch in range(Epochs):
    trainer.model.train()

    trainer.train_loss = 0
    trainer.train_acc = 0

    trainer.train()
    trainer.validate()
    history = trainer.loss_acc()


    print(f"Epoch:{epoch + 1} / {Epochs}, lr: {trainer.optimizer.param_groups[0]['lr']:.5f} train loss:{trainer.train_loss:.5f}, train acc: {trainer.train_acc:.5f}, valid loss:{trainer.val_loss:.5f}, valid acc:{trainer.val_acc:.5f}")
        
    # Update the best model if validation loss is the lowest so far
    if trainer.val_loss < trainer.best_val_loss:
        trainer.best_val_loss = trainer.val_loss
        trainer.best_model_state = trainer.model.state_dict()

    print(f'The best val loss is {trainer.best_val_loss}.\n')
    
    # Load the best model state
    if trainer.best_model_state is not None:
        trainer.model.load_state_dict(trainer.best_model_state)
    model = trainer.model
    
trainer.test()
metrics_df = trainer.metrics()

In [None]:
metrics_df = trainer.metrics()
metrics_df = metrics_df.applymap(lambda x: str(x).replace('.', ','))

# Metrics

In [None]:
from model_metrics import *                                                                         

In [None]:

# if flag_folder_sep:
#     results_path = f"C:/Users/Lucas/medical_images_models/results_COVID/Model_{model.get_name()}__Epoch_{Epochs}__Batch_{batch_size}__Accuracy_{metrics_df['Accuracy'][0]}"

# else:
#     results_path = f"C:/Users/Lucas/medical_images_models/results_HAM/Model_{model.get_name()}__Epoch_{Epochs}__Batch_{batch_size}__Accuracy_{metrics_df['Accuracy'][0]}"

In [None]:
metrics_df

In [None]:
metrics_df.to_csv(f"{results_path}/Model_{model.get_name()}__Epoch_{Epochs}__Batch_{batch_size}__Accuracy_{metrics_df['Accuracy'][0]}.csv", index=False)

In [None]:
plot_metrics(history, path=results_path)

## Plot Images - True Predicted

In [None]:
inverted_labels_dict = {value: key for key, value in labels_dict.items()}
inverted_labels_dict

In [None]:
plot_image_pred_true(model, test_dataset, device, inverted_labels_dict, num_images_to_plot=20, plot_images=True)

# Save Model

In [None]:
torch.save(model.state_dict(), f"{results_path}/Model_{model.get_name()}__Epoch_{Epochs}__Batch_{batch_size}__Accuracy_{metrics_df['Accuracy'][0]}.pth")