# Explore output of best current CNN model (performance & number of parameters)

In [None]:
%pip install torch torchvision
%pip install numpy
%pip install matplotlib
%pip install tqdm

In [None]:
# Load root project path to sys.path
import sys
import os

# Get the absolute path of the project's root directory
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add the project root to the Python path
if project_root not in sys.path:
    sys.path.append(project_root)

In [None]:
# Import the custom utilities
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from tqdm import tqdm
import os
import pickle
from src.models.efficient_net import H0_EfficientNetB0

In [None]:
# dataver0_path = os.path.join(project_root, 'data', 'processed', 'dataver0')
dataver0_path = "/home/haipn/data/dataver0"
train_path = os.path.join(dataver0_path, 'train')
valid_path = os.path.join(dataver0_path, 'valid')
# model_path = os.path.join(project_root, 'results', 'experiment_43', 'experiment_43.pth')
model_path = "/home/haipn/backup/experiment_43/experiment_43.pth"

In [None]:
# 1. Read images from train and valid set
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.dataset = ImageFolder(root_dir, transform=transform)
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        return image, label

# Set up data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load datasets
train_dataset = CustomDataset(train_path, transform=transform)
valid_dataset = CustomDataset(valid_path, transform=transform)

# Create data loaders
batch_size = 120
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# 2. Inference
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = H0_EfficientNetB0().to(device)
model.load_state_dict(torch.load(model_path))
model.eval()

def inference(dataloader):
    outputs = []
    labels = []
    with torch.no_grad():
        for images, batch_labels in tqdm(dataloader):
            images = images.to(device)
            batch_outputs = model(images)
            outputs.append(batch_outputs.cpu().numpy())
            labels.append(batch_labels.numpy())
    return np.concatenate(outputs), np.concatenate(labels)

# 3. Get outputs and labels
print("Processing training set...")
train_outputs, train_labels = inference(train_loader)
print("Processing validation set...")
valid_outputs, valid_labels = inference(valid_loader)

In [None]:
# 4. Save to one file
data = {
    'train_outputs': train_outputs,
    'train_labels': train_labels,
    'valid_outputs': valid_outputs,
    'valid_labels': valid_labels
}

with open('cnn_outputs.pkl', 'wb') as f:
    pickle.dump(data, f)

# 5. Visualize x and y of train and valid in 3D
def visualize_3d(outputs, labels, title):
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='3d')
    scatter = ax.scatter(outputs[:, 0], outputs[:, 1], outputs[:, 2], c=labels, cmap='viridis')
    ax.set_xlabel('Output 1')
    ax.set_ylabel('Output 2')
    ax.set_zlabel('Output 3')
    ax.set_title(title)
    plt.colorbar(scatter)
    plt.savefig(title + '.png')
    plt.show()

In [None]:
visualize_3d(train_outputs, train_labels, 'Training Set')

In [None]:
visualize_3d(valid_outputs, valid_labels, 'Validation Set')

In [None]:
# 6. Read the file to load x and y again
with open('cnn_outputs.pkl', 'rb') as f:
    loaded_data = pickle.load(f)

train_outputs = loaded_data['train_outputs']
train_labels = loaded_data['train_labels']
valid_outputs = loaded_data['valid_outputs']
valid_labels = loaded_data['valid_labels']

In [None]:
# 7. Add ReLU function to x and visualize again
def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigrelu(x):
    return sigmoid(relu(x))

In [None]:
# Apply ReLU
train_outputs_relu = relu(train_outputs)
valid_outputs_relu = relu(valid_outputs)

In [None]:
visualize_3d(train_outputs_relu, train_labels, 'Training Set (ReLU)')

In [None]:
visualize_3d(valid_outputs_relu, valid_labels, 'Validation Set (ReLU)')

In [None]:
# Apply SigReLU
train_outputs_sigrelu = sigrelu(train_outputs)
valid_outputs_sigrelu = sigrelu(valid_outputs)

In [None]:
visualize_3d(train_outputs_sigrelu, train_labels, 'Training Set (SigReLU)')

In [None]:
visualize_3d(valid_outputs_sigrelu, valid_labels, 'Validation Set (SigReLU)')

In [None]:
# Apply sigmoid
train_outputs_sigmoid = sigmoid(train_outputs)
valid_outputs_sigmoid = sigmoid(valid_outputs)

In [None]:
visualize_3d(train_outputs_sigmoid, train_labels, 'Training Set (Sigmoid)')

In [None]:
visualize_3d(valid_outputs_sigmoid, valid_labels, 'Validation Set (Sigmoid)')