In [46]:
import matplotlib.pyplot as plt
import torch
import numpy as np
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import  Dataset,DataLoader,SubsetRandomSampler,random_split
from sklearn.metrics import f1_score, precision_score, recall_score
import pandas as pd
import os
from PIL import Image

## DATA PREPARE

### IMPORT DATA

#### Datasets

 - Trainning dataset: https://cloud.ipb.pt/f/657d534db56645059905/?dl=1
 - Evaluate dataset: https://cloud.ipb.pt/f/27e4d3ac75d2405aa770/?dl=1

##### Dataset dispositions

#### Transformation functions and hyperparams

Aqui optaremos por duas transformações de imagens de treinamento, em modelos como AlexNet, VGG que trabalham com imagens de escala maior usaremos *227x227 pixels* para treinamento do AlexNet.

Outra abordagem será utilizar imagens de menor escala, para um rede neural menor de desenvolvimento próprio, baseado em outros notebooks e estudos relacionados, para esse modelo será utilizado amostras de imagens com a escala de *64x64 pixels*.

In [49]:
classes = ("1","0")

train_transform = transforms.Compose([
    transforms.Resize((64,64)),
    # transforms.Resize((224,224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

In [58]:
train_dataset = datasets.ImageFolder('./train', transform=train_transform)

train_size = int(0.9 * len(train_dataset))
test_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, test_size])

# DataLoaders for train and validation
train_loader = DataLoader(train_dataset,  shuffle=True)
val_loader = DataLoader(val_dataset, shuffle=True)



## Define Models Architecture

#### Our CNN architecture to 64x64px image input

In [51]:
class ConvolutionNeuralNetwork(nn.Module):
    def __init__(self,num_classes=2):
        super(ConvolutionNeuralNetwork, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(24)
        self.conv2 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=5, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(24)

        self.pool1 = nn.MaxPool2d(2,2)

        self.conv3 = nn.Conv2d(in_channels=24, out_channels=48, kernel_size=5, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(48)
        self.conv4 = nn.Conv2d(in_channels=48, out_channels=48, kernel_size=5, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(48)

        self.pool2 = nn.MaxPool2d(2,2)
        self.drop1=nn.Dropout(p=0.2)

        self.conv5 = nn.Conv2d(in_channels=48, out_channels=96, kernel_size=5, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(96)
        self.conv6 = nn.Conv2d(in_channels=96, out_channels=96, kernel_size=4, stride=1, padding=1)
        self.bn6 = nn.BatchNorm2d(96)

        self.pool3 = nn.MaxPool2d(2,2)

        self.fc1 = nn.Linear(96*5*5, 192)
        self.drop2=nn.Dropout(p=0.2)

        self.fc2 = nn.Linear(192, 96)
        self.fc3 = nn.Linear(96, num_classes)


    def forward(self, input):
        output = F.relu(self.bn1(self.conv1(input)))      
        output = F.relu(self.bn2(self.conv2(output)))     
        output = self.pool1(output)  

        output = F.relu(self.bn3(self.conv3(output)))      
        output = F.relu(self.bn4(self.conv4(output)))     
        output = self.pool2(output)      
        output = self.drop1(output)  

        output = F.relu(self.bn5(self.conv5(output)))      
        output = F.relu(self.bn6(self.conv6(output)))  
        output = self.pool3(output)  

        output = output.reshape(output.size(0),-1)

        output = F.relu(self.fc1(output))
        output = self.drop2(output)
        output = F.relu(self.fc2(output))
        output = self.fc3(output)

        return output

#### Alex Net architecture to 277x277px image input

In [6]:
class AlexNet(nn.Module):
    def __init__(self,num_classes=2):
        super(AlexNet, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=10, stride=4, padding=1)
        self.bn1 = nn.BatchNorm2d(96)
        self.pool1= nn.MaxPool2d(3,2)

        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm2d(256)
        self.pool2 = nn.MaxPool2d(3,2)

        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(384)

        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(384)

        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(256)
        self.pool3 = nn.MaxPool2d(3,2)

        self.fc1 = nn.Linear(9216, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, num_classes)


    def forward(self, input):
        output = F.relu(self.bn1(self.conv1(input)))   
        output = self.pool1(output)     
        output = F.relu(self.bn2(self.conv2(output)))     
        output = self.pool2(output)    

        output = F.relu(self.bn3(self.conv3(output)))     
        output = F.relu(self.bn4(self.conv4(output)))   
        output = F.relu(self.bn5(self.conv5(output))) 
        output = self.pool3(output)   

        output = output.reshape(output.size(0),-1)

        output = F.relu(self.fc1(output))
        output = F.relu(self.fc2(output))
        output = self.fc3(output)

        return output

### Define Model, loss function and optimizer

In [59]:
model = ConvolutionNeuralNetwork()

learning_rate = 0.001

criterion = nn.CrossEntropyLoss()
# criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Model save

In [10]:
def saveModel():
    torch.save(model.state_dict(), "apurated_model_mycnn.pth")

## Training and Validation functions

In [62]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cpu")
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        train_correct = 0
        for images, labels in train_loader:
            images,labels = images.to(device),labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            scores, predictions = torch.max(outputs.data, 1)
            train_correct += (predictions == labels).sum().item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}, AVG Training Acc {train_correct/len(train_loader):.4f}")

        # Validation
        model.eval()
        val_loss = 0.0
        val_correct = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images,labels = images.to(device),labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                scores, predictions = torch.max(outputs.data,1)
                val_correct+=(predictions == labels).sum().item()

        print(f"Validation Loss: {val_loss/len(val_loader):.4f}, AVG Validation Acc {val_correct/len(val_loader):.4f}")

    return model

In [17]:
def train_epoch(model,device,dataloader):
    train_loss,train_correct=0.0,0
    model.train()
    for images, labels in dataloader:

        images,labels = images.to(device),labels.to(device)

        optimizer.zero_grad()
        output = model(images)
        loss = loss_fn(output,labels)

        loss.backward()
        # accelerator.backward(loss)

        optimizer.step()
        train_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(output.data, 1)
        train_correct += (predictions == labels).sum().item()

    return train_loss,train_correct
  
def valid_epoch(model,device,dataloader):
    valid_loss, val_correct = 0.0, 0
    model.eval()
    with torch.no_grad():
        for images, labels in dataloader:

            images,labels = images.to(device),labels.to(device)
            output = model(images)
            loss=loss_fn(output,labels)
            
            valid_loss+=loss.item()*images.size(0)
            scores, predictions = torch.max(output.data,1)
            val_correct+=(predictions == labels).sum().item()

    return valid_loss,val_correct

In [18]:
def train(num_epochs):
    history = {'train_loss': [], 'test_loss': [],'train_acc':[],'test_acc':[]}
    best_accuracy = 0.0

    model.to(device)
    

    test_sampler = SubsetRandomSampler(val_idx)
    train_samples =  SubsetRandomSampler(train_idx)

    test_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=test_sampler)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_samples)

    for epoch in range(num_epochs):
        train_loss, train_correct=train_epoch(model,device,train_loader)
        test_loss, test_correct=valid_epoch(model,device,test_loader)

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100

        test_loss = test_loss / len(test_loader.sampler)
        test_acc = test_correct / len(test_loader.sampler) * 100

        print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Test Loss:{:.3f} AVG Training Acc {:.2f} % AVG Test Acc {:.2f} %".format(epoch + 1,
                                                                                                                num_epochs,
                                                                                                                train_loss,
                                                                                                                test_loss,
                                                                                                                train_acc,
                                                                                                                test_acc))
        if train_acc > best_accuracy:
            saveModel()
            best_accuracy = train_acc
            print("Best Accuracy:{} %".format(best_accuracy))

        history['train_loss'].append(train_loss)
        history['test_loss'].append(test_loss)
        history['train_acc'].append(train_acc)
        history['test_acc'].append(test_acc)   

    df_history = pd.DataFrame(data=history)
    df_history.to_csv("historic_mycnn.csv", encoding='utf-8', index=False)

## Training

In [63]:
trained_model = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10)
print('Finished Training')

Epoch 1/10, Loss: 0.4534, AVG Training Acc 0.8133
Validation Loss: 0.7838, AVG Validation Acc 0.8846
Epoch 2/10, Loss: 0.4171, AVG Training Acc 0.8533
Validation Loss: 0.1333, AVG Validation Acc 0.9231
Epoch 3/10, Loss: 0.3279, AVG Training Acc 0.8578
Validation Loss: 0.1875, AVG Validation Acc 1.0000
Epoch 4/10, Loss: 0.2956, AVG Training Acc 0.8889
Validation Loss: 0.2856, AVG Validation Acc 0.8462
Epoch 5/10, Loss: 0.2633, AVG Training Acc 0.9244
Validation Loss: 0.1357, AVG Validation Acc 1.0000
Epoch 6/10, Loss: 0.1948, AVG Training Acc 0.9556
Validation Loss: 0.1481, AVG Validation Acc 0.9615
Epoch 7/10, Loss: 0.1681, AVG Training Acc 0.9422
Validation Loss: 0.0485, AVG Validation Acc 0.9615
Epoch 8/10, Loss: 0.1754, AVG Training Acc 0.9378
Validation Loss: 0.1314, AVG Validation Acc 1.0000
Epoch 9/10, Loss: 0.2263, AVG Training Acc 0.8978
Validation Loss: 0.1027, AVG Validation Acc 1.0000
Epoch 10/10, Loss: 0.1671, AVG Training Acc 0.9200
Validation Loss: 0.0806, AVG Validation 

# TEST

In [64]:
def evaluate_model(model, val_loader):
    model.eval()
    device = torch.device('cpu')
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.numpy()  # Convert labels to CPU numpy array
            outputs = torch.sigmoid(model(images))
            if outputs.size(1) > 1:  # Caso haja múltiplas classes
                preds = torch.argmax(outputs, dim=1)  # Seleciona a classe com maior probabilidade
            else:
                preds = (outputs > 0.5).int().numpy()

            all_preds.extend(preds)
            all_labels.extend(labels)

    print(all_preds)
    f1 = f1_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)

    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")

    return f1

f1 = evaluate_model(trained_model, val_loader)
print(f"Final F1-Score: {f1:.4f}")

[tensor(0), tensor(1), tensor(0), tensor(0), tensor(1), tensor(1), tensor(1), tensor(1), tensor(1), tensor(0), tensor(1), tensor(1), tensor(1), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(0), tensor(1), tensor(1), tensor(0), tensor(0), tensor(1), tensor(0), tensor(0)]
Precision: 1.0000
Recall: 0.9231
F1-Score: 0.9600
Final F1-Score: 0.9600


### Teste para arquitetura AlexNet

In [65]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Ajuste o tamanho conforme necessário
    transforms.ToTensor(),          # Converte as imagens para tensores
])

class ImageFolderEval(Dataset):
    def __init__(self, directory, transform=None):
        self.directory = directory
        self.transform = transform
        # Listar todas as imagens no diretório
        self.images = [f for f in os.listdir(directory) if f.endswith(('jpg', 'jpeg', 'png'))]
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.directory, self.images[idx])
        image = Image.open(img_path).convert('RGB')  # Abrir a imagem
        if self.transform:
            image = self.transform(image)
        return image, self.images[idx]  

# Carregar as imagens do diretório './eval'
eval_dir = './eval'
eval_dataset = ImageFolderEval(eval_dir, transform=transform)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False)

result_file = "result.txt"
trained_model.eval()
with open(result_file, 'w') as f:
    for i, (images, img_names) in enumerate(eval_loader):
        images = images.to(torch.device('cpu'))

        # Fazer a previsão com o modelo
        outputs = torch.sigmoid(trained_model(images))

        # Se o modelo for multi-classe ou multi-rótulo, use o índice da classe com maior probabilidade
        if outputs.size(1) > 1:  # Caso haja múltiplas classes
            preds = torch.argmax(outputs, dim=1)  # Seleciona a classe com maior probabilidade
        else:
            preds = (outputs > 0.5).int()  # Para um único valor de saída (classificação binária)

        # Iterando sobre cada previsão no batch
        for j, (pred, img_name) in enumerate(zip(preds, img_names)):
            f.write(f"{img_name} {pred.item()}\n")

print(f"Predictions saved to {result_file}")

Predictions saved to result.txt
