Setup Libraries

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import f1_score
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
from collections import Counter

Load and Split Train Dataset

In [17]:

# Define transformations (resize, normalize, etc.)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset
data_dir = 'C:/Users/CaioGabrielAdernedeM/OneDrive/IPB/ipb_sistemas_inteligentes/projeto_final/train'
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Split dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Calculate split sizes
dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = dataset_size - train_size - val_size  # Ensure all samples are used

# Split the dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Print dataset sizes
print(f"Total dataset size: {dataset_size}")
print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")

# Function to count labels in a dataset
def count_labels(subset):
    labels = [dataset.targets[idx] for idx in subset.indices]  # Get labels for the subset
    label_counts = Counter(labels)
    return {dataset.classes[label]: count for label, count in label_counts.items()}

# Print label counts for each subset
print("Training set label counts:", count_labels(train_dataset))
print("Validation set label counts:", count_labels(val_dataset))
print("Test set label counts:", count_labels(test_dataset))

# Create DataLoaders (optional, if needed for training/validation/testing)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


Total dataset size: 251
Training set size: 200
Validation set size: 25
Test set size: 26
Training set label counts: {'positive': 88, 'negative': 112}
Validation set label counts: {'positive': 13, 'negative': 12}
Test set label counts: {'positive': 10, 'negative': 16}


In [21]:
# Define a simple feedforward neural network
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Define hyperparameters for each model
models_config = [
    {"hidden_size": 64, "learning_rate": 0.001},
    {"hidden_size": 128, "learning_rate": 0.001},
    {"hidden_size": 256, "learning_rate": 0.001},
    {"hidden_size": 64, "learning_rate": 0.0005},
    {"hidden_size": 128, "learning_rate": 0.0005},
    {"hidden_size": 256, "learning_rate": 0.0005},
    {"hidden_size": 64, "learning_rate": 0.0001},
    {"hidden_size": 128, "learning_rate": 0.0001},
    {"hidden_size": 256, "learning_rate": 0.0001}
]

# Function to train and validate a model
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        # Validation step
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {100 * correct / total:.2f}%")

# DataFrame to store results
results = []

# Train and evaluate each model
input_size = 224 * 224 * 3  # Input size based on image dimensions
output_size = len(dataset.classes)  # Number of classes

for i, config in enumerate(models_config):
    print(f"\nTraining Model {i+1} with Hidden Size: {config['hidden_size']}, Learning Rate: {config['learning_rate']}")
    
    # Initialize model, loss function, and optimizer
    model = SimpleNN(input_size=input_size, hidden_size=config['hidden_size'], output_size=output_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    
    # Train the model
    train_model(model, train_loader, val_loader, criterion, optimizer, epochs=5)

    # Test the model
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to("cuda" if torch.cuda.is_available() else "cpu"), labels.to("cuda" if torch.cuda.is_available() else "cpu")
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    print(f"Model {i+1} Test Accuracy: {test_accuracy:.2f}%")

    # Append results to DataFrame
    results.append({
        "Model Number": i+1,
        "Hidden Size": config['hidden_size'],
        "Learning Rate": config['learning_rate'],
        "Test Accuracy": test_accuracy
    })

# Create DataFrame and save results
results_df = pd.DataFrame(results)
print("\nFinal Results:")
print(results_df)

# Save results to CSV
results_df.to_csv("model_results.csv", index=False)


Training Model 1 with Hidden Size: 64, Learning Rate: 0.001
Epoch 1/5, Train Loss: 4.2276, Val Loss: 7.8351, Val Accuracy: 92.00%
Epoch 2/5, Train Loss: 2.0698, Val Loss: 6.9877, Val Accuracy: 92.00%
Epoch 3/5, Train Loss: 2.3440, Val Loss: 4.8703, Val Accuracy: 92.00%
Epoch 4/5, Train Loss: 1.4944, Val Loss: 0.0000, Val Accuracy: 100.00%
Epoch 5/5, Train Loss: 2.1741, Val Loss: 2.6308, Val Accuracy: 96.00%
Model 1 Test Accuracy: 92.31%

Training Model 2 with Hidden Size: 128, Learning Rate: 0.001
Epoch 1/5, Train Loss: 11.7614, Val Loss: 6.3868, Val Accuracy: 92.00%
Epoch 2/5, Train Loss: 5.4236, Val Loss: 13.9766, Val Accuracy: 92.00%
Epoch 3/5, Train Loss: 2.1285, Val Loss: 8.0812, Val Accuracy: 88.00%
Epoch 4/5, Train Loss: 1.8741, Val Loss: 13.5654, Val Accuracy: 92.00%
Epoch 5/5, Train Loss: 2.0365, Val Loss: 4.5869, Val Accuracy: 96.00%
Model 2 Test Accuracy: 92.31%

Training Model 3 with Hidden Size: 256, Learning Rate: 0.001
Epoch 1/5, Train Loss: 18.9463, Val Loss: 23.1944, 

In [7]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)  # Binary classification (COVID vs Non-COVID)
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\CaioGabrielAdernedeM/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 72.3MB/s]


In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [10]:
num_epochs = 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {train_loss/len(train_loader):.4f}")


Epoch 1, Loss: 0.3136
Epoch 2, Loss: 0.0295
Epoch 3, Loss: 0.0055
Epoch 4, Loss: 0.0039
Epoch 5, Loss: 0.0077
Epoch 6, Loss: 0.0010
Epoch 7, Loss: 0.0036
Epoch 8, Loss: 0.0021
Epoch 9, Loss: 0.0007
Epoch 10, Loss: 0.0016


In [11]:
model.eval()
val_labels = []
val_preds = []

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        val_labels.extend(labels.cpu().numpy())
        val_preds.extend(preds.cpu().numpy())

f1 = f1_score(val_labels, val_preds)
print(f"Validation F1 Score: {f1:.4f}")


Validation F1 Score: 1.0000


In [14]:
# Load dataset
eval_dir = 'C:/Users/CaioGabrielAdernedeM/OneDrive/IPB/ipb_sistemas_inteligentes/projeto_final/evaluation_set'
eval_dataset = datasets.ImageFolder(root=eval_dir, transform=transform)
eval_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False)

FileNotFoundError: Couldn't find any class folder in C:/Users/CaioGabrielAdernedeM/OneDrive/IPB/ipb_sistemas_inteligentes/projeto_final/evaluation_set.