In [56]:

import torch
import torch.nn as nn
import torch.nn.functional as F

import pytorch_lightning
import torchvision.models as models
import torchvision.transforms as transforms
from pytorch_lightning.loggers import TensorBoardLogger

from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid

import pytorch_lightning as pl
from efficientnet_pytorch import EfficientNet

from PIL import Image
import pandas as pd
import os

In [57]:

# Load CSV file and define paths
csv_file = '/home/ashiley/HAM10000_metadata_alterado.csv'
data_path_1 = '/home/ashiley/HAM10000_images_part_1'
data_path_2 = '/home/ashiley/HAM10000_images_part_2'

In [61]:
import os

class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir_1, root_dir_2, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir_1 = root_dir_1
        self.root_dir_2 = root_dir_2
        self.transform = transform
        
        # Create a list of all images in both directories
        self.all_images = set(os.listdir(root_dir_1) + os.listdir(root_dir_2))

        # Call a method to remove images not present in either directory
        self.remove_unused_images()
    
    def __len__(self):
        print(len(self.annotations))
        return len(self.annotations)
    
    def __getitem__(self, idx):
        img_name = self.annotations.iloc[idx, 1] + '.jpg'
        img_path = os.path.join(self.root_dir_1 if img_name in self.all_images else self.root_dir_2, img_name)
        image = Image.open(img_path).convert('RGB')
        label = self.annotations.iloc[idx, 2]
        label = torch.tensor(0) if label == 0 else torch.tensor(1)
        if self.transform:
            image = self.transform(image)
        return image, label
    
    def remove_unused_images(self):
        print(len(self.all_images))
        # Remove images not present in either directory
        for img_name in self.all_images:
            img_name_shortened = img_name[:-4]  # Remove os últimos 4 caracteres ('.jpg')
            if img_name_shortened not in self.annotations['image_id'].values:
                for root_dir in [self.root_dir_1, self.root_dir_2]:
                    img_path = os.path.join(root_dir, img_name)

                    if os.path.exists(img_path):
                        os.remove(img_path)
                        print(f"Image {img_name} not found in specified CSV. Removed from directory.")
        print(len(self.all_images))

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


# Create custom dataset instance
custom_dataset = CustomDataset(csv_file=csv_file, root_dir_1=data_path_1, root_dir_2=data_path_2, transform=transform)

# Cria um DataLoader para carregar os dados em lotes durante o treinamento.
data_loader = DataLoader(custom_dataset, batch_size=32, shuffle=True)

# Check if CUDA is available
cuda_available = torch.cuda.is_available()

if cuda_available:
    # Get the number of available CUDA devices
    num_cuda_devices = torch.cuda.device_count()
    print("CUDA is available and {} CUDA device(s) is(are) available.".format(num_cuda_devices))
else:
    print("CUDA is not available. You are running on CPU.")

# Move a tensor to the GPU if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Concatenate datasets
full_dataset = custom_dataset  # Assuming you have already defined custom_dataset

9873
9873
9873
9873
CUDA is available and 2 CUDA device(s) is(are) available.
