In [None]:
#Importing libraries

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torchsummary import summary
from tqdm import tqdm
import os
import shutil
from sklearn.model_selection import train_test_split
import seaborn as sns
from torchvision.datasets import ImageFolder
from sklearn.metrics import confusion_matrix

In [None]:
#Create Baseline VGG16 Model using pretrained weights

In [None]:
train_dir = 'C:/Users/Kranti/Desktop/EE541_project/archive_new/Training'
val_dir = 'C:/Users/Kranti/Desktop/EE541_project/archive_new/Validation'
test_dir = 'C:/Users/Kranti/Desktop/EE541_project/archive_new/Testing/testing'

# Print the number of images in each category for the training set
print('Training set:')
for class_name in os.listdir(train_dir):
    class_dir = os.path.join(train_dir, class_name)
    print(class_name + ':', len(os.listdir(class_dir)))

# Print the number of images in each category for the validation set
print('Validation set:')
for class_name in os.listdir(val_dir):
    class_dir = os.path.join(val_dir, class_name)
    print(class_name + ':', len(os.listdir(class_dir)))

batch_size = 32
epochs = 10

# Create an instance of the VGG16 model
vgg = models.vgg16(pretrained=True)

# Create a new model that includes the VGG16 model and additional layers
model = nn.Sequential(
    vgg.features,
    nn.Flatten(),yy
    nn.Linear(25088, 256),
    nn.ReLU(),
    nn.Linear(256, 1),
    nn.Sigmoid()
)

# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the loss function, optimizer, and scheduler
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Create transforms for the training and validation data
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets for the training and validation data
train_dataset = datasets.ImageFolder(train_dir, transform=train_transforms)
val_dataset = datasets.ImageFolder(val_dir, transform=val_transforms)
test_dataset = ImageFolder(test_dir, transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Create data loaders for the training and validation data
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Lists to store the training and validation accuracy and loss
train_accs = []
val_accs = []
train_losses = []
val_losses = []

# Train the model
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.float().unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        predicted = outputs > 0.5
        total += labels.size(0)
        correct += (predicted == labels.float().unsqueeze(1)).sum().item()
    train_loss = running_loss / len(train_dataset)
    train_acc = correct / total
    train_losses.append(train_loss)
    train_accs.append(train_acc)

    # Evaluate the model on the validation set
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels.float().unsqueeze(1))
            val_loss += loss.item() * inputs.size(0)
            predicted = outputs > 0.5
            total += labels.size(0)
            correct += (predicted == labels.float().unsqueeze(1)).sum().item()
    val_loss = val_loss / len(val_dataset)
    val_acc = correct / total
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    print(f'Epoch {epoch+1}/{epochs} -- Training Loss: {train_loss:.4f} -- Training Accuracy: {train_acc:.4f} -- Validation Loss: {val_loss:.4f} -- Validation Accuracy: {val_acc:.4f}')

# Plot the training and validation accuracy
plt.figure()
plt.plot(range(1, epochs+1), train_accs, label='Training Accuracy')
plt.plot(range(1, epochs+1), val_accs, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()

# Plot the training and validation loss
plt.figure()
plt.plot(range(1, epochs+1), train_losses, label='Training Loss')
plt.plot(range(1, epochs+1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
model.eval()
test_loss = 0.0
correct = 0
total = 0
true_labels = []  # List to store the true labels
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels.float().unsqueeze(1))
        test_loss += loss.item() * inputs.size(0)
        predicted = outputs > 0.5
        total += labels.size(0)
        correct += (predicted == labels.float().unsqueeze(1)).sum().item()
        true_labels.extend(labels.cpu().numpy().flatten())  # Collect the true labels

test_loss = test_loss / len(test_dataset)
test_acc = correct / total
print(f'Test Loss: {test_loss:.4f} -- Test Accuracy: {test_acc:.4f}')

# Get the predicted labels for the test set
predicted_labels = []
with torch.no_grad():
    for inputs, _ in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        predicted = outputs > 0.5
        predicted_labels.extend(predicted.cpu().numpy().flatten())

# Create the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Plot the confusion matrix using Seaborn
plt.figure()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()