In [1]:

import torch
import os
import random
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

# Set the seed for reproducibility
SEED = 42
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(SEED)  # Apply the seed


In [None]:

BATCH_SIZE = 32
EPOCHS = 100
INPUT_SIZE = (256, 256)  

def count_files_in_directory(directory):
    total_files = 0
    for root, dirs, files in os.walk(directory):
        total_files += len(files)
    return total_files

main_data_dir = r"C:\Users\Josh\Desktop\CUDA\splitdata"
train_dir = os.path.join(main_data_dir, "train")
val_dir = os.path.join(main_data_dir, "val")
test_dir = os.path.join(main_data_dir, "test")

train_files = count_files_in_directory(train_dir)
val_files = count_files_in_directory(val_dir)
test_files = count_files_in_directory(test_dir)

print(f"Training Dataset: {train_files}")
print(f"Validation Dataset: {val_files}")
print(f"Test Dataset: {test_files}")

In [None]:

# Load the training dataset to calculate mean and std, and get class labels
train_dataset = datasets.ImageFolder(root=train_dir)
class_n = list(train_dataset.class_to_idx.keys())  # Automatically retrieves class names from folders
print("Class to label mapping:", train_dataset.class_to_idx)

In [4]:

# from sklearn.cluster import KMeans
# from PIL import Image

# class KMeansSegmentation:
#     def __init__(self, n_clusters=10, p=0.3):
#         """
#         Args:
#             n_clusters (int): Number of clusters for segmentation.
#             p (float): Probability of applying K-means segmentation.
#         """
#         self.n_clusters = n_clusters
#         self.p = p

#     def __call__(self, img):
#         # Check if K-means segmentation should be applied
#         if np.random.rand() > self.p:
#             return img  # Skip segmentation with probability `1 - p`

#         # Convert image to numpy array and reshape for K-means
#         img_np = np.array(img)
#         h, w, c = img_np.shape
#         img_np = img_np.reshape(-1, c)

#         # Apply K-means clustering
#         kmeans = KMeans(n_clusters=self.n_clusters, random_state=0)
#         kmeans.fit(img_np)
#         segmented_img = kmeans.cluster_centers_[kmeans.labels_].reshape(h, w, c).astype(np.uint8)

#         # Convert back to PIL image
#         return Image.fromarray(segmented_img)


In [6]:
MEAN = (0.5, 0.5, 0.5)
STD = (0.5, 0.5, 0.5)

transform_train = transforms.Compose([
    # Resize to standardize input dimensions
    transforms.Resize(INPUT_SIZE),
    transforms.CenterCrop((224, 224)), 

    # Apply segmentation after resizing
    # KMeansSegmentation(n_clusters=10, p=0.3),

    # Slight rotations with fill to avoid black edges
    transforms.RandomApply(
        [transforms.RandomRotation(degrees=(0, 20))], p=0.25
    ),
    transforms.RandomApply(
        [transforms.RandomRotation(degrees=(0, 10))], p=0.25
    ),
    transforms.RandomApply(
        [transforms.RandomRotation(degrees=(-20, 0))], p=0.25
    ),
    transforms.RandomApply(
        [transforms.RandomRotation(degrees=(-10, 0))], p=0.25
    ),

  
    # # Subtle affine transformations
    # transforms.RandomApply([
    #     transforms.RandomAffine(degrees=5, translate=(0.02, 0.02), shear=2)
    # ], p=0.3),

    # # Perspective distortion
    # transforms.RandomApply([
    #     transforms.RandomPerspective(distortion_scale=0.05, p=0.2)
    # ], p=0.2),

    # Minor Gaussian blur
    transforms.RandomApply([
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5))
    ], p=0.4),

    # Color and grayscale variations
    transforms.RandomApply([
        transforms.ColorJitter(brightness=0.2, contrast=0.2)
    ], p=0.7),  
    
    transforms.RandomApply([
        transforms.RandomGrayscale(p=1.0)
    ], p=0.2),  

    # Horizontal flip
    transforms.RandomApply([transforms.RandomHorizontalFlip()], p=0.5),

    # Adjust sharpness slightly
    transforms.RandomApply([
        transforms.RandomAdjustSharpness(sharpness_factor=1.5)
    ], p=0.3),

    # Convert to tensor and normalize
    transforms.ToTensor(),
    transforms.Normalize(mean=MEAN, std=STD)
])



transform_val_test = transforms.Compose([
    transforms.Resize(INPUT_SIZE),
    transforms.CenterCrop((224, 224)), 
    transforms.ToTensor(),
    transforms.Normalize(mean=MEAN, std=STD)
])

# Load the datasets with the new transforms
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform_train)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform_val_test)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform_val_test)


# Create DataLoaders for each dataset
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [None]:
images, labels = next(iter(train_loader))
print(images.shape)  

In [None]:

# Function to unnormalize the image for visualization
def unnormalize(image, mean, std):
    # Convert the tensor to a NumPy array and transpose dimensions to (H, W, C)
    image = image.numpy().transpose((1, 2, 0))  
    
    # Unnormalize by reversing the mean and std normalization
    image = (image * std) + mean  
    
    # Clip values to be between 0 and 1 for valid image display
    image = np.clip(image, 0, 1)  
    return image


# Visualize a batch of images from the train_loader
def visualize_loader(loader, mean, std, class_names, num_images=6):
    # Get a batch of images
    data_iter = iter(loader)
    images, labels = next(data_iter)  

    # Plot the images
    plt.figure(figsize=(12, 8))
    for i in range(num_images):
        plt.subplot(2, 3, i+1)
        image = unnormalize(images[i], mean, std)  
        plt.imshow(image)
        plt.title(f"Class: {class_names[labels[i]]}")
        plt.axis('off')

    plt.tight_layout()
    plt.show()

# Use this function to visualize a batch of images
visualize_loader(train_loader, mean=MEAN, std=STD, class_names=class_n)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

# Define the function to load pretrained AlexNet with a modified classifier
def get_alexnet_model(num_classes=7, pretrained=False):
    # Load the pretrained AlexNet model
    model = models.alexnet(pretrained=pretrained)
    
    # Modify the classifier's last layer to match the number of classes
    model.classifier[6] = nn.Linear(in_features=4096, out_features=num_classes)
    
    return model

# Define number of classes for the last layer
num_classes = 7
alexnet = get_alexnet_model(num_classes=num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
alexnet.to(device)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the Adam optimizer
optimizer = optim.Adam(alexnet.parameters(), lr=1e-4)

In [None]:
from tqdm import tqdm
import torch.optim.lr_scheduler as lr_scheduler

PATIENCE = 10

# Define the learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=PATIENCE)

# Implement early stopping
def train_model(model, train_loader, val_loader, epochs, criterion, optimizer, device, scheduler, patience, save_path='best_model.pth'):
    train_losses = []  
    val_losses = []   
    train_accuracies = []  
    val_accuracies = []    

    best_val_accuracy = 0.0  
    best_epoch = 0  
    patience_counter = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch {epoch+1}/{epochs}")
            for inputs, labels in tepoch:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()  # Zero gradients
                outputs = model(inputs)  # Forward pass
                loss = criterion(outputs, labels)  # Compute loss

                loss.backward()  # Backward pass
                optimizer.step()  # Update weights

                running_loss += loss.item()  # Accumulate loss

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                tepoch.set_postfix(loss=running_loss / (tepoch.n + 1), accuracy=100 * correct / total)

        epoch_train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total 
        train_losses.append(epoch_train_loss)  
        train_accuracies.append(train_accuracy)  

        # Validation loop
        val_loss = 0.0
        correct = 0
        total = 0
        model.eval()
        with torch.no_grad():
            with tqdm(val_loader, unit="batch", leave=False) as vepoch:
                vepoch.set_description(f"Validation Epoch {epoch+1}/{epochs}")
                for inputs, labels in vepoch:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                    vepoch.set_postfix(val_loss=val_loss / (vepoch.n + 1), val_accuracy=100 * correct / total)

        epoch_val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * correct / total 
        val_losses.append(epoch_val_loss) 
        val_accuracies.append(val_accuracy)  

        print(f'\nEpoch [{epoch+1}/{epochs}] - Train Loss: {epoch_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, '
              f'Validation Loss: {epoch_val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        scheduler.step(epoch_val_loss)

        # Early stopping
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_epoch = epoch + 1
            patience_counter = 0
            print(f"New best validation accuracy: {val_accuracy:.2f}% at epoch {best_epoch}. Saving model...\n")
            torch.save(model.state_dict(), save_path)
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1}")
                break

    print(f"\nTraining complete. Best validation accuracy: {best_val_accuracy:.2f}% at epoch {best_epoch}")
    return train_losses, val_losses, train_accuracies, val_accuracies

# Train the model with early stopping
train_losses, val_losses, train_accuracies, val_accuracies = train_model(
    alexnet, train_loader, val_loader, EPOCHS, criterion, optimizer, device, scheduler, patience=PATIENCE, save_path='alexnet_best_model.pth'
)


In [None]:
import matplotlib.pyplot as plt

def plot_losses(train_losses, val_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.show()


plot_losses(train_losses, val_losses)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import torch

def evaluate_model_on_validation(model, val_loader, device, class_names):
    model.eval()  # Set model to evaluation mode
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)  # Forward pass
            _, predicted = torch.max(outputs, 1)  # Get predicted classes
            
            # Append true labels and predictions
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Generate classification report
    print("Validation Classification Report:\n")
    print(classification_report(y_true, y_pred, target_names=class_names))

    # Generate confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Plot confusion matrix using seaborn
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Validation Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.show()

# List of class names (categories)
class_names = class_n

# Evaluate the model and generate metrics for the validation set
evaluate_model_on_validation(alexnet, val_loader, device, class_names)


In [None]:
def evaluate_model(model, test_loader, device, class_names):
    model.eval()
    y_true = []
    y_pred = []
    images_to_plot = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)

            # Append true labels and predictions
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

            # Store images and corresponding labels for plotting later
            images_to_plot.append((inputs.cpu(), labels.cpu(), predicted.cpu()))

    # Generate classification report
    print("Classification Report:\n")
    print(classification_report(y_true, y_pred, target_names=class_names))

    # Generate confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Plot confusion matrix using seaborn
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.show()

# List of class names (categories)
class_names = class_n  # Use the defined categories

# Evaluate the model and generate metrics
evaluate_model(alexnet, test_loader, device, class_names)