In [1]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
from torchvision.models import vgg19_bn
from torchvision.models.vgg import VGG19_BN_Weights
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import os

In [4]:
os.listdir('/kaggle/input/covidct')

['CT_NonCOVID', 'COVID-CT-MetaInfo.xlsx', 'CT_COVID']

In [5]:
# 2. Get file paths and labels
positive_samples = [os.path.join('/kaggle/input/covidct/CT_COVID', fname) for fname in os.listdir('/kaggle/input/covidct/CT_COVID')]
negative_samples = [os.path.join('/kaggle/input/covidct/CT_NonCOVID', fname) for fname in os.listdir('/kaggle/input/covidct/CT_NonCOVID')]

all_samples = positive_samples + negative_samples
labels = [1] * len(positive_samples) + [0] * len(negative_samples)

# 3. Perform stratified sampling
train_samples, test_samples, train_labels, test_labels = train_test_split(all_samples, labels, test_size=0.2, stratify=labels)

# Now, train_samples and test_samples contain the file paths for the training and testing datasets, respectively.
# train_labels and test_labels contain the corresponding labels.


In [22]:
# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
# model = models.vgg19_bn(pretrained=True)
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = False  # Freeze all layers

model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier[6].parameters(), lr=0.001)

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")

# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=50)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

predicted_np = predicted.cpu().numpy()
labels_np = labels.cpu().numpy()

# Calculate metrics
precision = precision_score(labels_np, predicted_np)
recall = recall_score(labels_np, predicted_np)
f1 = f1_score(labels_np, predicted_np)

print(f"Test Accuracy: {100 * correct / total:.2f}%")

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Epoch 1/50 Loss: 0.6390
Epoch 2/50 Loss: 0.5250
Epoch 3/50 Loss: 0.4748
Epoch 4/50 Loss: 0.4546
Epoch 5/50 Loss: 0.4272
Epoch 6/50 Loss: 0.4124
Epoch 7/50 Loss: 0.4051
Epoch 8/50 Loss: 0.3812
Epoch 9/50 Loss: 0.3616
Epoch 10/50 Loss: 0.4158
Epoch 11/50 Loss: 0.3801
Epoch 12/50 Loss: 0.3413
Epoch 13/50 Loss: 0.3492
Epoch 14/50 Loss: 0.3671
Epoch 15/50 Loss: 0.3247
Epoch 16/50 Loss: 0.3340
Epoch 17/50 Loss: 0.3407
Epoch 18/50 Loss: 0.3153
Epoch 19/50 Loss: 0.3410
Epoch 20/50 Loss: 0.3314
Epoch 21/50 Loss: 0.3636
Epoch 22/50 Loss: 0.3011
Epoch 23/50 Loss: 0.3181
Epoch 24/50 Loss: 0.3233
Epoch 25/50 Loss: 0.3337
Epoch 26/50 Loss: 0.3120
Epoch 27/50 Loss: 0.3137
Epoch 28/50 Loss: 0.3048
Epoch 29/50 Loss: 0.3090
Epoch 30/50 Loss: 0.3372
Epoch 31/50 Loss: 0.3080
Epoch 32/50 Loss: 0.2849
Epoch 33/50 Loss: 0.3007
Epoch 34/50 Loss: 0.2949
Epoch 35/50 Loss: 0.2735
Epoch 36/50 Loss: 0.2947
Epoch 37/50 Loss: 0.2831
Epoch 38/50 Loss: 0.3101
Epoch 39/50 Loss: 0.2876
Epoch 40/50 Loss: 0.2803
Epoch 41/

In [23]:
# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
# model = models.vgg19_bn(pretrained=True)
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = False  # Freeze all layers

model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier[6].parameters(), lr=0.003)

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")

# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=50)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

predicted_np = predicted.cpu().numpy()
labels_np = labels.cpu().numpy()

# Calculate metrics
precision = precision_score(labels_np, predicted_np)
recall = recall_score(labels_np, predicted_np)
f1 = f1_score(labels_np, predicted_np)

print(f"Test Accuracy: {100 * correct / total:.2f}%")

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Epoch 1/50 Loss: 0.6270
Epoch 2/50 Loss: 0.5182
Epoch 3/50 Loss: 0.4408
Epoch 4/50 Loss: 0.4139
Epoch 5/50 Loss: 0.4469
Epoch 6/50 Loss: 0.3497
Epoch 7/50 Loss: 0.3612
Epoch 8/50 Loss: 0.3699
Epoch 9/50 Loss: 0.3792
Epoch 10/50 Loss: 0.3780
Epoch 11/50 Loss: 0.3925
Epoch 12/50 Loss: 0.3736
Epoch 13/50 Loss: 0.3682
Epoch 14/50 Loss: 0.3075
Epoch 15/50 Loss: 0.3252
Epoch 16/50 Loss: 0.3405
Epoch 17/50 Loss: 0.3037
Epoch 18/50 Loss: 0.3122
Epoch 19/50 Loss: 0.3445
Epoch 20/50 Loss: 0.3765
Epoch 21/50 Loss: 0.3238
Epoch 22/50 Loss: 0.2458
Epoch 23/50 Loss: 0.2648
Epoch 24/50 Loss: 0.3120
Epoch 25/50 Loss: 0.2949
Epoch 26/50 Loss: 0.3129
Epoch 27/50 Loss: 0.3129
Epoch 28/50 Loss: 0.2688
Epoch 29/50 Loss: 0.3106
Epoch 30/50 Loss: 0.3214
Epoch 31/50 Loss: 0.2870
Epoch 32/50 Loss: 0.2780
Epoch 33/50 Loss: 0.2764
Epoch 34/50 Loss: 0.2814
Epoch 35/50 Loss: 0.2656
Epoch 36/50 Loss: 0.2824
Epoch 37/50 Loss: 0.3193
Epoch 38/50 Loss: 0.2866
Epoch 39/50 Loss: 0.3488
Epoch 40/50 Loss: 0.3533
Epoch 41/

In [24]:

# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
# model = models.vgg19_bn(pretrained=True)
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = True  # Freeze all layers

# model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

# Extract the original classifier layers of VGG-19
original_classifier = model.classifier

# Define new layers you want to add
new_layers = [
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 256),  # New dense layer with 256 neurons
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, 1)    # Final layer for binary classification
]

# Extend the original classifier with new layers
extended_classifier = nn.Sequential(
    *list(original_classifier.children())[:-1],  # All layers except the last one
    *new_layers                                 # Your new layers
)

# Replace the model's classifier with the new one
model.classifier = extended_classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")

# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=10)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

predicted_np = predicted.cpu().numpy()
labels_np = labels.cpu().numpy()

# Calculate metrics
precision = precision_score(labels_np, predicted_np)
recall = recall_score(labels_np, predicted_np)
f1 = f1_score(labels_np, predicted_np)

print(f"Test Accuracy: {100 * correct / total:.2f}%")

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Epoch 1/10 Loss: 0.8309
Epoch 2/10 Loss: 0.5027
Epoch 3/10 Loss: 0.2348
Epoch 4/10 Loss: 0.2098
Epoch 5/10 Loss: 0.1364
Epoch 6/10 Loss: 0.1845
Epoch 7/10 Loss: 0.0337
Epoch 8/10 Loss: 0.1549
Epoch 9/10 Loss: 0.2425
Epoch 10/10 Loss: 0.0715
Test Accuracy: 84.67%
Precision: 0.8000
Recall: 0.9231
F1 Score: 0.8571


In [25]:

# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
# model = models.vgg19_bn(pretrained=True)
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = True  # Freeze all layers

# model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

# Extract the original classifier layers of VGG-19
original_classifier = model.classifier

# Define new layers you want to add
new_layers = [
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 256),  # New dense layer with 256 neurons
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, 1)    # Final layer for binary classification
]

# Extend the original classifier with new layers
extended_classifier = nn.Sequential(
    *list(original_classifier.children())[:-1],  # All layers except the last one
    *new_layers                                 # Your new layers
)

# Replace the model's classifier with the new one
model.classifier = extended_classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")

# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=30)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

predicted_np = predicted.cpu().numpy()
labels_np = labels.cpu().numpy()

# Calculate metrics
precision = precision_score(labels_np, predicted_np)
recall = recall_score(labels_np, predicted_np)
f1 = f1_score(labels_np, predicted_np)

print(f"Test Accuracy: {100 * correct / total:.2f}%")

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Epoch 1/30 Loss: 0.9032
Epoch 2/30 Loss: 0.5331
Epoch 3/30 Loss: 0.3038
Epoch 4/30 Loss: 0.1201
Epoch 5/30 Loss: 0.1134
Epoch 6/30 Loss: 0.1310
Epoch 7/30 Loss: 0.2438
Epoch 8/30 Loss: 0.0685
Epoch 9/30 Loss: 0.1409
Epoch 10/30 Loss: 0.1485
Epoch 11/30 Loss: 0.0647
Epoch 12/30 Loss: 0.0189
Epoch 13/30 Loss: 0.0221
Epoch 14/30 Loss: 0.0711
Epoch 15/30 Loss: 0.0912
Epoch 16/30 Loss: 0.2015
Epoch 17/30 Loss: 0.0348
Epoch 18/30 Loss: 0.0502
Epoch 19/30 Loss: 0.0636
Epoch 20/30 Loss: 0.0750
Epoch 21/30 Loss: 0.0518
Epoch 22/30 Loss: 0.0768
Epoch 23/30 Loss: 0.0540
Epoch 24/30 Loss: 0.0176
Epoch 25/30 Loss: 0.0553
Epoch 26/30 Loss: 0.1029
Epoch 27/30 Loss: 0.0212
Epoch 28/30 Loss: 0.1344
Epoch 29/30 Loss: 0.0768
Epoch 30/30 Loss: 0.0155
Test Accuracy: 84.67%
Precision: 1.0000
Recall: 0.6923
F1 Score: 0.8182


In [26]:

# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
# model = models.vgg19_bn(pretrained=True)
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = True  # Freeze all layers

# model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

# Extract the original classifier layers of VGG-19
original_classifier = model.classifier

# Define new layers you want to add
new_layers = [
    nn.Linear(4096, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.5),
    
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.LeakyReLU(0.1),
    nn.Dropout(0.4),
    
    nn.Linear(256, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.3),
    
    nn.Linear(128, 1)
]

# Extend the original classifier with new layers
extended_classifier = nn.Sequential(
    *list(original_classifier.children())[:-1],  # All layers except the last one
    *new_layers                                 # Your new layers
)

# Replace the model's classifier with the new one
model.classifier = extended_classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")

# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=30)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        
predicted_np = predicted.cpu().numpy()
labels_np = labels.cpu().numpy()

# Calculate metrics
precision = precision_score(labels_np, predicted_np)
recall = recall_score(labels_np, predicted_np)
f1 = f1_score(labels_np, predicted_np)

print(f"Test Accuracy: {100 * correct / total:.2f}%")

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Epoch 1/30 Loss: 0.6730
Epoch 2/30 Loss: 0.5109
Epoch 3/30 Loss: 0.2933
Epoch 4/30 Loss: 0.1664
Epoch 5/30 Loss: 0.0822
Epoch 6/30 Loss: 0.0404
Epoch 7/30 Loss: 0.0274
Epoch 8/30 Loss: 0.0354
Epoch 9/30 Loss: 0.0248
Epoch 10/30 Loss: 0.0674
Epoch 11/30 Loss: 0.0798
Epoch 12/30 Loss: 0.0773
Epoch 13/30 Loss: 0.0497
Epoch 14/30 Loss: 0.0523
Epoch 15/30 Loss: 0.0217
Epoch 16/30 Loss: 0.0232
Epoch 17/30 Loss: 0.0228
Epoch 18/30 Loss: 0.0407
Epoch 19/30 Loss: 0.0377
Epoch 20/30 Loss: 0.0216
Epoch 21/30 Loss: 0.0201
Epoch 22/30 Loss: 0.0434
Epoch 23/30 Loss: 0.0283
Epoch 24/30 Loss: 0.0092
Epoch 25/30 Loss: 0.0087
Epoch 26/30 Loss: 0.0171
Epoch 27/30 Loss: 0.0123
Epoch 28/30 Loss: 0.0276
Epoch 29/30 Loss: 0.0151
Epoch 30/30 Loss: 0.0198
Test Accuracy: 88.67%
Precision: 0.9167
Recall: 0.8462
F1 Score: 0.8800


In [27]:

# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
# model = models.vgg19_bn(pretrained=True)
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = True  # Freeze all layers

# model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

# Extract the original classifier layers of VGG-19
original_classifier = model.classifier

# Define new layers you want to add
new_layers = [
    nn.Linear(4096, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.5),
    
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.LeakyReLU(0.1),
    nn.Dropout(0.4),
    
    nn.Linear(256, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.3),
    
    nn.Linear(128, 1)
]

# Extend the original classifier with new layers
extended_classifier = nn.Sequential(
    *list(original_classifier.children())[:-1],  # All layers except the last one
    *new_layers                                 # Your new layers
)

# Replace the model's classifier with the new one
model.classifier = extended_classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

# Early stopping parameters
n_epochs_stop = 5  # Number of epochs with no improvement after which training will be stopped
min_val_loss = float('inf')
epochs_no_improve = 0

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")
        
        # Validation loss for the epoch
        val_loss = 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)

        val_loss /= len(val_loader.dataset)

        # Check if the validation loss has improved
        if val_loss < min_val_loss:
            min_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve == n_epochs_stop:
                print(f"Early stopping! Stopped at epoch {epoch}")
                break


# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=30)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        
predicted_np = predicted.cpu().numpy()
labels_np = labels.cpu().numpy()

# Calculate metrics
precision = precision_score(labels_np, predicted_np)
recall = recall_score(labels_np, predicted_np)
f1 = f1_score(labels_np, predicted_np)

print(f"Test Accuracy: {100 * correct / total:.2f}%")

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Epoch 1/30 Loss: 0.6544


NameError: name 'val_loader' is not defined