## Imports

In [None]:
# For OS interaction and system-specific parameters
import os
import sys

# PyTorch libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, Dataset, WeightedRandomSampler, Subset
from torch.optim.lr_scheduler import StepLR
import cv2

# Torchvision
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import torchvision.datasets as datasets


# Albumentations for Data Augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2

# PIL for image operations
from PIL import Image

# Matplotlib for plotting and visualizations
import matplotlib.pyplot as plt

# Import numpy
import numpy as np

# TensorBoard for PyTorch
from tensorboardX import SummaryWriter

# CodeCarbon for tracking our carbon emissions
from codecarbon import EmissionsTracker

# tqdm for showing progress bars
from tqdm.notebook import tqdm

# Import Netron for visualizing our model
import netron

# Add scripts to directory
sys.path.append('/Users/jacob/OneDrive/Desktop/SyntheticEye/Development/src/utils')
# Import custom helper functions from scripts directory
import helper_functions

## Gain Insights on Dataset
This is so we can better understand our data and helps us to decide which fixed image size to choose

In [None]:
# Import necessary function from helper_functions.py
from helper_functions import plot_image_dimensions_bar_graph
from helper_functions import plot_total_image_dimensions_bar_graph
from helper_functions import plot_class_distribution
from helper_functions import check_accuracy_aletheia4

### Plot Image Dimensions

In [None]:
# Plotting overall image dimensions
img_dir = "/Users/jacob/OneDrive/Desktop/Aletheia4Dataset/"
plot_total_image_dimensions_bar_graph(img_dir, heading='Aletheia Dataset Image Dimensions')

In [None]:
# Plotting dimensions of cg-generated images
img_dir = "/Users/jacob/OneDrive/Desktop/Aletheia4Dataset/AI/"
plot_image_dimensions_bar_graph(img_dir, heading='CG Image Dimensions')

In [None]:
# Plotting dimensions of GAN images
img_dir = "/Users/jacob/OneDrive/Desktop/Aletheia4Dataset/GAN/"
plot_image_dimensions_bar_graph(img_dir, heading='GAN Image Dimensions')

In [None]:
# Plotting dimensions of real images
img_dir = "/Users/jacob/OneDrive/Desktop/Aletheia4Dataset/REAL/"
plot_image_dimensions_bar_graph(img_dir, heading='Real Image Dimensions')

### Plot Class Distribution

In [None]:
plot_class_distribution('/Users/jacob/OneDrive/Desktop/Aletheia4Dataset/')

## Prepare Data

In [None]:
from helper_functions import show_img

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, img_directory, indices=None, transforms=None):
        self.img_directory = img_directory
        self.transforms = transforms
        self.img_labels = []
        self.img_names = []

        # Iterate through classes
        for class_id, class_name in enumerate(os.listdir(img_directory)):
            class_dir = os.path.join(img_directory, class_name)
            # Iterate through all images of a class
            for img in os.listdir(class_dir):
                if img.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                    self.img_names.append(os.path.join(class_name, img))
                    self.img_labels.append(class_id)

        # Subset handling
        if indices is not None:
            self.img_names = [self.img_names[i] for i in indices]
            self.img_labels = [self.img_labels[i] for i in indices]

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, index):
        img_path = os.path.join(self.img_directory, self.img_names[index])
        image = Image.open(img_path).convert('RGB')

        # Apply transforms
        if self.transforms:
            image = np.array(image) 
            image = self.transforms(image=image)['image']

        label = self.img_labels[index]
        return image, label

In [None]:
from helper_functions import get_image_mean_std

dataset_path = "/Users/jacob/OneDrive/Desktop/Aletheia4Dataset/"

mean, std = get_image_mean_std(dataset_path)
print(mean)
print(std)

### Apply Data Augmentation
We augment the images in our dataset to make sure our model is robust and to prevent overfitting.

In [None]:
train_transforms = A.Compose([
    A.SmallestMaxSize(max_size=304), 
    A.CenterCrop(256, 256), 
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.03, rotate_limit=7, p=0.5),
    A.PixelDropout(dropout_prob=0.01, p=0.35),
    A.Normalize(mean=[0.4953, 0.4166, 0.3759], std=[0.2432, 0.2228, 0.2194]),
    ToTensorV2()
])

test_transforms = A.Compose([
    A.SmallestMaxSize(max_size=304), 
    A.CenterCrop(256, 256), 
    A.Normalize(mean=[0.4953, 0.4166, 0.3759], std=[0.2432, 0.2228, 0.2194]),
    ToTensorV2()
])

In [None]:
def show_images(dataset, images=12):
    # Set up figure
    fig, axes = plt.subplots(1, images, figsize=(images * 3, 3))
    
    for i in range(images):
        # Get an image from dataset
        image = dataset[i]
        
        # Convert image to numpy array
        if torch.is_tensor(image):
            image = image.numpy().transpose((1, 2, 0))

        # Display the image
        axes[i].imshow(image)
        axes[i].axis('off')

    plt.show()

## Create Dataset

In [None]:
# Create Dataset
dataset = CustomImageDataset("C:\\Users\\jacob\\OneDrive\\Desktop\\Aletheia4Dataset")

In [None]:
num_samples = len(dataset)
print(f"Number of samples in the dataset: {num_samples}")

### Define Weights for Classes in Dataset

In [None]:
classes = 3 

def define_class_weights(labels, classes):
    count = [0] * classes

    # Count frequency of class labels
    for label in labels:
        count[label] += 1
    class_weights = [0.] * classes

    # Calculate number of samples in dataset
    samples = float(sum(count))
    
    # Calculate weight for each class
    for i in range(classes):
        if count[i] == 0:
            class_weights[i] = 0 
        else:
            class_weights[i] = samples / float(count[i])
    weight = [class_weights[label] for label in labels]
    return weight

weights = define_class_weights(dataset.img_labels, classes)
weights = torch.DoubleTensor(weights)
sampler = WeightedRandomSampler(weights, len(dataset))

In [None]:
# Set manual seed for reproducibility
torch.manual_seed(42)

# Split dataset into train, test, and validation sets
train_size = int(0.85 * len(dataset))
val_size = int(0.05 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset_i, val_dataset_i, test_dataset_i = random_split(range(len(dataset)), [train_size, val_size, test_size])

# Apply transforms and create dataset instances for each split
transformed_train_dataset = CustomImageDataset("C:\\Users\\jacob\\OneDrive\\Desktop\\Aletheia4Dataset", indices=train_dataset_i, transforms=train_transforms)
transformed_val_dataset = CustomImageDataset("C:\\Users\\jacob\\OneDrive\\Desktop\\Aletheia4Dataset", indices=val_dataset_i, transforms=test_transforms)
transformed_test_dataset = CustomImageDataset("C:\\Users\\jacob\\OneDrive\\Desktop\\Aletheia4Dataset", indices=test_dataset_i, transforms=test_transforms)

# Extract labels for the training set
train_labels = [transformed_train_dataset.img_labels[i] for i in range(len(transformed_train_dataset))]

# Calculate weights for the training set
weights = define_class_weights(train_labels, classes)
weights = torch.DoubleTensor(weights)

# Create a sampler for the training set
sampler = WeightedRandomSampler(weights, len(weights))

# Create DataLoaders for each dataset split
train_loader = DataLoader(transformed_train_dataset, batch_size=32, sampler=sampler, shuffle=False)
val_loader = DataLoader(transformed_val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(transformed_test_dataset, batch_size=64, shuffle=False)

In [None]:
print(transformed_train_dataset.__len__())
print(transformed_val_dataset.__len__())
print(transformed_test_dataset.__len__())

### Create Sample Image With Transformations

In [None]:
# Define transformations
train_transforms = A.Compose([
    A.SmallestMaxSize(max_size=304), 
    A.CenterCrop(256, 256), 
    A.HorizontalFlip(p=1),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=1),
    A.PixelDropout(dropout_prob=0.015, p=1),
])

img = cv2.imread("C:\\Users\\jacob\\OneDrive\\Desktop\\SyntheticEye\\SampleData\\AI\\030CHMXYM3.jpg") 
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Ensure image is in RGB format

# Apply transformations
transformed = train_transforms(image=img)
transformed_img = transformed["image"]

# Plot original and transformed image 
plt.figure(figsize=(24, 12))

plt.subplot(1, 2, 1)
plt.title('Original Image')
plt.imshow(img)
plt.axis('off')

plt.subplot(1, 2, 2)
plt.title('Transformed Image')
plt.imshow(transformed_img)
plt.axis('off')

plt.show()

## Create Neural Networks

In [None]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, dropout_prob=0.1):
        super(ResBlock, self).__init__()

        branch_channels = out_channels // 2

        # 3x3 convolution branch
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, branch_channels, 3, 1, 1),
            nn.BatchNorm2d(branch_channels),
            nn.LeakyReLU(),
            nn.Dropout(dropout_prob)
        )

        # 5x5 convolution branch
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, branch_channels, 5, 1, 2), 
            nn.BatchNorm2d(branch_channels),
            nn.LeakyReLU(),
            nn.Dropout(dropout_prob)
        )

        # Define skip connection
        self.residual = nn.Conv2d(in_channels, out_channels, 1) if in_channels != out_channels else nn.Identity()

    def forward(self, x):
        # Apply both branches
        out3x3 = self.branch3x3(x)
        out5x5 = self.branch5x5(x)

        out = torch.cat([out3x3, out5x5], dim=1)

        # Apply residual connection
        res = self.residual(x)
        return out + res

class Aletheia4Net(nn.Module):
    def __init__(self, dropout_prob=0.35):
        super(Aletheia4Net, self).__init__()

        # Convolutional layers with residual blocks and max-pooling
        self.conv_layers = nn.Sequential(
            ResBlock(3, 16),
            nn.MaxPool2d(2),
            ResBlock(16, 32),
            nn.MaxPool2d(2),
            ResBlock(32, 64),
            nn.MaxPool2d(2),
            ResBlock(64, 128),
            nn.MaxPool2d(2),
            ResBlock(128, 256)
        )

        # Global Average Pooling
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))

        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Linear(256, 512),
            nn.LeakyReLU(),
            nn.Dropout(dropout_prob),
            nn.Linear(512, 256),
            nn.LeakyReLU(),
            nn.Dropout(dropout_prob),
            nn.Linear(256, 3)
        )

    def feature_size(self):
        return self.conv_layers(torch.zeros(1, 3, 256, 256)).view(1, -1).size(1)

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.global_avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

## Set Up Device Agnostic Code

In [None]:
# Set device to GPU if available, else use the CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

## Hyperparamters

In [None]:
# Define Hyperparameters
num_classes = 3
learning_rate = 0.0003
batch_size = 32
num_epochs = 16

## Training

### Prepare Training 

In [None]:
# Set random seed for reproducibility and initialize model
torch.manual_seed(42)
model = Aletheia4Net().to(device)

In [None]:
# Use CrossEntropyLoss for classification problem
loss_function = torch.nn.CrossEntropyLoss()
# Use NAdam (a variant of the Adam optimizer) as our optimizer
optimizer = optim.NAdam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
# Define learning rate scheduler to automatically adjust our learning rate
scheduler = StepLR(optimizer, step_size=1, gamma=0.9)

In [None]:
# Initialize TensorBoard
writer = SummaryWriter(f'runs/Aletheia4_3')
step = 0

In [None]:
from helper_functions import check_accuracy

### Train Model

In [None]:
model = Aletheia4Net().to(device)

sample_input = torch.randn(1, 3, 256, 256).to(device)
model(sample_input)

# Specify path to the trained model weights
model_path = "C:\\Users\\jacob\\OneDrive\\Desktop\\SyntheticEye\\Development\\Aletheia4_3_epoch_16.pth"

# Load trained weights into the model
model.load_state_dict(torch.load(model_path))

model = model.to(device)

In [None]:
# Initialize EmissionsTracker to track carbon emissions using the CodeCarbon library
carbon_tracker = EmissionsTracker(project_name="Aletheia4_3", log_level="critical")
carbon_tracker.start()

# Initialize tracking of correct predictions and total predictions
correct = 0
samples = 0

torch.manual_seed(42)

model = model.to(device)

metrics_interval = 100

# Start training
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    train_progress = tqdm(enumerate(train_loader), total=len(train_loader))
    for batch_idx, (inputs, labels) in train_progress:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = loss_function(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if (batch_idx + 1) % metrics_interval == 0:
            writer.add_scalar('Training Loss', running_loss / metrics_interval, epoch * len(train_loader) + batch_idx)
            writer.add_scalar('Training Accuracy', 100 * correct / total, epoch * len(train_loader) + batch_idx)
            # Print training results
            print(f'Epoch {epoch}, Training Accuracy: {100 * correct / total:.2f}%', flush=True)
            # Reset values
            running_loss = 0.0
            correct = 0
            total = 0


    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / total
    writer.add_scalar('Validation Loss', val_loss, epoch)
    writer.add_scalar('Validation Accuracy', val_accuracy, epoch)

    # Print validation results
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

    # Scheduler step
    scheduler.step()

    # Save model checkpoint
    torch.save(model.state_dict(), f'Aletheia4_3_epoch_{16 + epoch}.pth')

# Finalize carbon tracking
emissions = carbon_tracker.stop()
print(f"Emissions: {emissions:.5f} kgCO2eq")

# Close TensorBoard writer
writer.close()

## Evaluate Model

### Accuracy

In [None]:
model = Aletheia4Net().to(device)

sample_input = torch.randn(1, 3, 256, 256).to(device)
model(sample_input)

# Specify path to the trained model weights
model_path = "C:\\Users\\jacob\\OneDrive\\Desktop\\SyntheticEye\\Development\\Aletheia4_3_epoch_16.pth"
model.load_state_dict(torch.load(model_path))

model = model.to(device)

In [None]:
# Check accuracy of trained model on the test data
check_accuracy_aletheia4(train_loader, model, device)

In [None]:
# Load the new dataset
new_root_directory = "C:\\Users\\jacob\\OneDrive\\Desktop\\StyleGANEval"

new_dataset = CustomImageDataset("C:\\Users\\jacob\\OneDrive\\Desktop\\StyleGANEval", transforms=test_transforms)

new_test_loader = DataLoader(new_dataset, batch_size=batch_size, shuffle=False)

# Evaluate accuracy on new dataset
check_accuracy_aletheia4(new_test_loader, model, device)

### Precision

In [None]:
from sklearn.metrics import precision_score

In [None]:
model = Aletheia4Net().to(device)

# Perform forward pass to create the fc1 layer
sample_input = torch.randn(1, 3, 256, 256).to(device)
model(sample_input)

# Load model
model_path = "./Aletheia4_3_epoch_0.pth"
model.load_state_dict(torch.load(model_path))

model = model.to(device)

# Ensure model is in evaluation mode
model.eval()

# Move model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Store predictions and labels
all_preds = []
all_true_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        # Move inputs and labels to the same device as the model
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)  

        # Collect predictions and true labels
        all_preds.extend(predicted.cpu().numpy())
        all_true_labels.extend(labels.cpu().numpy())

# Calculate precision for each class
precision = precision_score(all_true_labels, all_preds, average=None)

# Print precision for each class
for i, class_precision in enumerate(precision):
    print(f"Precision for class {i}: {class_precision}")

## Visualize Model

In [None]:
netron.start("C:\\Users\\jacob\\OneDrive\\Desktop\\SyntheticEye\\Development\\Aletheia4_3_epoch_16.pth")