In [2]:
import matplotlib.pyplot as plt
import torch
import numpy as np
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import  Dataset,DataLoader,random_split
from sklearn.metrics import f1_score, precision_score, recall_score
import pandas as pd
import os
from PIL import Image

## DATA PREPARE

### IMPORT DATA

#### Datasets

 - Trainning dataset: https://cloud.ipb.pt/f/657d534db56645059905/?dl=1
 - Evaluate dataset: https://cloud.ipb.pt/f/27e4d3ac75d2405aa770/?dl=1

##### Dataset dispositions

In [3]:
train_transform = transforms.Compose([
    transforms.Resize((64,64)),
    # transforms.Grayscale(num_output_channels=3),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

In [4]:
dataset = datasets.ImageFolder('./train', transform=train_transform)

train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - (train_size+val_size)

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# DataLoaders for train and validation
train_loader = DataLoader(train_dataset,  batch_size=32,shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32,shuffle=False)

test_loader = DataLoader(test_dataset, batch_size=32,shuffle=False)

full_dataset = DataLoader(dataset, batch_size=32,shuffle=False)



## Define Models Architecture

#### Our CNN architecture to 64x64px image input

In [5]:
class ConvolutionNeuralNetwork(nn.Module):
    def __init__(self):
        super(ConvolutionNeuralNetwork, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(24)
        self.conv2 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(24)

        self.pool1 = nn.MaxPool2d(2,2)

        self.conv3 = nn.Conv2d(in_channels=24, out_channels=48, kernel_size=5, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(48)
        self.conv4 = nn.Conv2d(in_channels=48, out_channels=48, kernel_size=5, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(48)

        self.pool2 = nn.MaxPool2d(2,2)
        self.drop1=nn.Dropout(p=0.2)

        self.conv5 = nn.Conv2d(in_channels=48, out_channels=96, kernel_size=5, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(96)
        self.conv6 = nn.Conv2d(in_channels=96, out_channels=96, kernel_size=4, stride=1, padding=1)
        self.bn6 = nn.BatchNorm2d(96)

        self.pool3 = nn.MaxPool2d(2,2)

        self.fc1 = nn.Linear(96*5*5, 192)
        self.drop2=nn.Dropout(p=0.2)

        self.fc2 = nn.Linear(192, 96)
        self.fc3 = nn.Linear(96, 1)


    def forward(self, input):
        output = F.relu(self.bn1(self.conv1(input)))      
        output = F.relu(self.bn2(self.conv2(output)))     
        output = self.pool1(output)  

        output = F.relu(self.bn3(self.conv3(output)))      
        output = F.relu(self.bn4(self.conv4(output)))     
        output = self.pool2(output)      
        output = self.drop1(output)  

        output = F.relu(self.bn5(self.conv5(output)))      
        output = F.relu(self.bn6(self.conv6(output)))  
        output = self.pool3(output)  

        output = output.reshape(output.size(0),-1)

        output = F.relu(self.fc1(output))
        output = self.drop2(output)
        output = F.relu(self.fc2(output))
        output = self.fc3(output)

        return output

### Define Model, loss function and optimizer

In [9]:
model = ConvolutionNeuralNetwork()

learning_rate = 0.0001

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

## Training and Validation functions

In [11]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        train_correct = 0
        total_train_samples = 0
        
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device).float().unsqueeze(1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            predictions = (torch.sigmoid(outputs) > 0.5).int()
            train_correct += (predictions == labels).sum().item()
            total_train_samples += labels.size(0)

        avg_train_acc = train_correct / total_train_samples
        avg_train_loss = running_loss / len(train_loader)

        print(f"\nEpoch {epoch+1}/{num_epochs}, Loss: {avg_train_loss:.4f}, AVG Training Acc: {avg_train_acc:.4f}")

        if val_loader is not None:
            # Validation
            model.eval()
            val_loss = 0.0
            val_correct = 0
            total_val_samples = 0
            with torch.no_grad():
                for images, labels in val_loader:
                    images = images.to(device)
                    labels = labels.to(device).float().unsqueeze(1)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    predictions = (torch.sigmoid(outputs) > 0.5).int()
                    val_correct += (predictions == labels).sum().item()
                    total_val_samples += labels.size(0)

            avg_val_acc = val_correct / total_val_samples
            avg_val_loss = val_loss / len(val_loader)

            print(f"Validation Loss: {avg_val_loss:.4f}, AVG Validation Acc: {avg_val_acc:.4f}")

    return model


## Training

In [12]:
trained_model = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=15)
print('Finished Training')


Epoch 1/15, Loss: 0.1684, AVG Training Acc: 0.9800
Validation Loss: 1.2000, AVG Validation Acc: 0.4400

Epoch 2/15, Loss: 0.0883, AVG Training Acc: 0.9850
Validation Loss: 1.4771, AVG Validation Acc: 0.4400

Epoch 3/15, Loss: 0.0694, AVG Training Acc: 0.9700
Validation Loss: 1.0651, AVG Validation Acc: 0.4400

Epoch 4/15, Loss: 0.0616, AVG Training Acc: 0.9800
Validation Loss: 0.2196, AVG Validation Acc: 0.9200

Epoch 5/15, Loss: 0.0413, AVG Training Acc: 0.9850
Validation Loss: 0.0562, AVG Validation Acc: 1.0000

Epoch 6/15, Loss: 0.0390, AVG Training Acc: 0.9850
Validation Loss: 0.1595, AVG Validation Acc: 0.9600

Epoch 7/15, Loss: 0.0533, AVG Training Acc: 0.9800
Validation Loss: 0.0648, AVG Validation Acc: 0.9600

Epoch 8/15, Loss: 0.0323, AVG Training Acc: 0.9950
Validation Loss: 0.0894, AVG Validation Acc: 0.9200

Epoch 9/15, Loss: 0.0474, AVG Training Acc: 0.9800
Validation Loss: 0.1668, AVG Validation Acc: 0.9600

Epoch 10/15, Loss: 0.0217, AVG Training Acc: 0.9900
Validation 

# TEST

In [19]:
def evaluate_model(model, val_loader):
    model.eval()
    device = torch.device('cpu')
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.numpy()  # Convert labels to CPU numpy array
            outputs = torch.sigmoid(model(images))
            if outputs.size(1) > 1:  # Caso haja múltiplas classes
                preds = torch.argmax(outputs, dim=1)  # Seleciona a classe com maior probabilidade
            else:
                preds = (outputs > 0.5).int().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    print(all_preds)
    f1 = f1_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)

    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")

    return f1

f1 = evaluate_model(trained_model, test_loader)
print(f"Final F1-Score: {f1:.4f}")

[array([1], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([1], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([0], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([0], dtype=int32), array([1], dtype=int32), array([0], dtype=int32)]
Precision: 0.9167
Recall: 1.0000
F1-Score: 0.9565
Final F1-Score: 0.9565


## Final train with the entire dataset

In [78]:
trained_model = train_model(trained_model, full_dataset, None, criterion, optimizer, num_epochs=5)

Epoch 1/5, Loss: 0.0373, AVG Training Acc 0.9880
Epoch 2/5, Loss: 0.1307, AVG Training Acc 0.9681
Epoch 3/5, Loss: 0.3589, AVG Training Acc 0.9363
Epoch 4/5, Loss: 0.1765, AVG Training Acc 0.9681
Epoch 5/5, Loss: 0.1402, AVG Training Acc 0.9602


### Eval classification

In [21]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Ajuste o tamanho conforme necessário
    transforms.ToTensor(),          # Converte as imagens para tensores
])

class ImageFolderEval(Dataset):
    def __init__(self, directory, transform=None):
        self.directory = directory
        self.transform = transform
        # Listar todas as imagens no diretório
        self.images = [f for f in os.listdir(directory) if f.endswith(('jpg', 'jpeg', 'png'))]
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.directory, self.images[idx])
        image = Image.open(img_path).convert('RGB')  # Abrir a imagem
        if self.transform:
            image = self.transform(image)
        return image, self.images[idx]  

# Carregar as imagens do diretório './eval'
eval_dir = './eval'
eval_dataset = ImageFolderEval(eval_dir, transform=transform)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False)

result_file = "result.txt"
trained_model.eval()
with open(result_file, 'w') as f:
    for i, (images, img_names) in enumerate(eval_loader):
        images = images.to(torch.device('cpu'))

        # Fazer a previsão com o modelo
        outputs = torch.sigmoid(trained_model(images))

        # Se o modelo for multi-classe ou multi-rótulo, use o índice da classe com maior probabilidade
        if outputs.size(1) > 1:  # Caso haja múltiplas classes
            preds = torch.argmax(outputs, dim=1)  # Seleciona a classe com maior probabilidade
        else:
            preds = (outputs > 0.5).int()  # Para um único valor de saída (classificação binária)

        # Iterando sobre cada previsão no batch
        for j, (pred, img_name) in enumerate(zip(preds, img_names)):
            f.write(f"{img_name} {pred.item()}\n")

print(f"Predictions saved to {result_file}")

Predictions saved to result.txt
