In [2]:
import os
from sklearn.model_selection import train_test_split

import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [11]:
# 2. Get file paths and labels
positive_samples = [os.path.join('CT_COVID', fname) for fname in os.listdir('CT_COVID')]
negative_samples = [os.path.join('CT_NonCOVID', fname) for fname in os.listdir('CT_NonCOVID')]

all_samples = positive_samples + negative_samples
labels = [1] * len(positive_samples) + [0] * len(negative_samples)

# 3. Perform stratified sampling
train_samples, test_samples, train_labels, test_labels = train_test_split(all_samples, labels, test_size=0.2, stratify=labels)

# Now, train_samples and test_samples contain the file paths for the training and testing datasets, respectively.
# train_labels and test_labels contain the corresponding labels.


In [6]:
train_samples[1]

'CT_COVID\\2020.02.22.20024927-p18-66%2.png'

In [None]:
# 1. Load VGG-19 model
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# 2. Add custom layers
x = base_model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze all layers in the VGG-19 model
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 3. Prepare your dataset
# Assuming you have organized your CT scan images in directories like this:
# /dataset/train/covid_positive/
# /dataset/train/covid_negative/
# /dataset/validation/covid_positive/
# /dataset/validation/covid_negative/
# /dataset/test/covid_positive/
# /dataset/test/covid_negative/

train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory('/dataset/train/', target_size=(224, 224), batch_size=32, class_mode='binary')
validation_generator = validation_datagen.flow_from_directory('/dataset/validation/', target_size=(224, 224), batch_size=32, class_mode='binary')
test_generator = test_datagen.flow_from_directory('/dataset/test/', target_size=(224, 224), batch_size=32, class_mode='binary')

# 4. Fine-tune the model
model.fit(train_generator, epochs=10, validation_data=validation_generator)

# 5. Evaluate the model's accuracy
loss, accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {accuracy * 100:.2f}%')

# 6. Experiment with different architectures and parameters
# You can change the architecture of the custom layers added on top of VGG-19, experiment with different optimizers, learning rates, etc. to improve the model's accuracy.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image

# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
model = models.vgg19_bn(pretrained=True)
for param in model.parameters():
    param.requires_grad = False  # Freeze all layers

model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier[6].parameters(), lr=0.001)

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")

# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=10)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


In [13]:
from torchvision.models import vgg19_bn
from torchvision.models.vgg import VGG19_BN_Weights

In [14]:

# 2. Define custom datasets for the stratified data
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_samples, train_labels, transform=transform)
test_dataset = CustomDataset(test_samples, test_labels, transform=transform)

# 3. Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 4. Load the VGG-19 model and modify for binary classification
# model = models.vgg19_bn(pretrained=True)
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
for param in model.parameters():
    param.requires_grad = False  # Freeze all layers

model.classifier[6] = nn.Linear(model.classifier[6].in_features, 1)  # Modify last layer for binary classification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 5. Define the loss function, optimizer, and training loop
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier[6].parameters(), lr=0.001)

def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}")

# 6. Train the model
train_model(model, criterion, optimizer, num_epochs=10)

# 7. Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        outputs = model(inputs)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Epoch 1/10 Loss: 0.6141
Epoch 2/10 Loss: 0.5153
Epoch 3/10 Loss: 0.4543
Epoch 4/10 Loss: 0.4441
Epoch 5/10 Loss: 0.4360
Epoch 6/10 Loss: 0.4221
Epoch 7/10 Loss: 0.3711
Epoch 8/10 Loss: 0.3744
Epoch 9/10 Loss: 0.3700
Epoch 10/10 Loss: 0.3775
Test Accuracy: 76.67%
