## CNN Architecture

In [5]:
import torch.nn as nn
import torch.optim as optim
import torchvision 
import torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset, random_split
# Other necessary imports for your specific dataset and preprocessing
import os, time, glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import torch
from torchvision import transforms
from skimage import io
from skimage.transform import resize
import matplotlib.pyplot as plt
import random
import matplotlib.patches as patches
from torchvision import ops
from torch.nn.utils.rnn import pad_sequence
import os
from sklearn.model_selection import train_test_split

data_folder = '/home/m3-learning/Documents/myML/preprocessed_data/'
file_list = os.listdir(data_folder)

file_pairs = {}
for file in file_list:
    if file.endswith('_img.npz'):
        identifier = file.split('_')[0]
        if identifier not in file_pairs:
            file_pairs[identifier] = {'img': file, 'box': None}
        else:
            file_pairs[identifier]['img'] = file
    elif file.endswith('_box.npz'):
        identifier = file.split('_')[0]
        if identifier not in file_pairs:
            file_pairs[identifier] = {'img': None, 'box': file}
        else:
            file_pairs[identifier]['box'] = file

# Load image and bounding box data
images = []
bounding_boxes = []

for identifier, files in file_pairs.items():
    img_file = files['img']
    box_file = files['box']
    if img_file and box_file:
        img_data = np.load(os.path.join(data_folder, img_file))['arr_0']
        box_data = np.load(os.path.join(data_folder, box_file))['arr_0']
        images.append(img_data)
        bounding_boxes.append(box_data)

# Convert to NumPy arrays
images = np.array(images)
bounding_boxes = np.array(bounding_boxes)

# Resize images and bounding boxes
resized_images = []
resized_bounding_boxes = []

target_height = 100  # Set your desired height
target_width = 100   # Set your desired width


for image, bounding_box in zip(images, bounding_boxes):
    # Resize the image
    resized_image = cv2.resize(image, (target_width, target_height))  # Using OpenCV for resizing
    resized_images.append(resized_image)
    
    # Calculate the scaling factor for bounding boxes (assuming resizing is proportional)
    height_scale = target_height / image.shape[0]
    width_scale = target_width / image.shape[1]
    
    # Scale bounding box coordinates accordingly
    scaled_bounding_box = bounding_box * np.array([width_scale, height_scale, width_scale, height_scale])
    
    # Ensure the bounding box has the same number of elements (i.e., a consistent shape)
    if scaled_bounding_box.shape == (4,):
        resized_bounding_boxes.append(scaled_bounding_box)
    else:
        print(f"Ignoring bounding box: {scaled_bounding_box} with shape: {scaled_bounding_box.shape}")


# Convert the resized images and bounding boxes back to numpy arrays
resized_images = np.array(resized_images)
resized_bounding_boxes = np.array(resized_bounding_boxes)

# Continue with the rest of your code using resized_images and resized_bounding_boxes

# Define the model
class FCNN(nn.Module):
    def __init__(self, input_shape,num_boxes = 272, num_classes = 4):
        super(FCNN, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 8 * 8, 2048),
            nn.ReLU(),
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, (num_boxes * num_classes))  # Output layer for multiple boxes
        )

        self.num_boxes = num_boxes
        self.num_classes = num_classes

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x.view(-1, self.num_boxes, self.num_classes)

input_channels = 1
input_height = 64
input_width = 64

# Define the input size based on the input image shape
input_shape = input_channels * input_height * input_width

# Define your custom dataset
class BoundingBoxDataset(Dataset):
    def __init__(self, images, bounding_boxes, transform):
        self.images = images
        self.bounding_boxes = bounding_boxes
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        bounding_box = self.bounding_boxes[idx]
        
        

        return image, bounding_box

# Split data into train and test sets
train_images, test_images, train_bounding_boxes, test_bounding_boxes = train_test_split(
    resized_images, resized_bounding_boxes, test_size=0.5, random_state=42)

# Convert NumPy arrays to PyTorch tensors
train_images_tensor = torch.from_numpy(train_images).float()
train_bounding_boxes_tensor = torch.from_numpy(train_bounding_boxes).float()

test_images_tensor = torch.from_numpy(test_images).float()
test_bounding_boxes_tensor = torch.from_numpy(test_bounding_boxes).float()

# Add a channel dimension to grayscale images
train_images_tensor = train_images_tensor.unsqueeze(1)  # Adds a channel dimension at index 1
test_images_tensor = test_images_tensor.unsqueeze(1)


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Create instances of your custom dataset
train_dataset = BoundingBoxDataset(train_images_tensor, train_bounding_boxes_tensor,transform=transform)
test_dataset = BoundingBoxDataset(test_images_tensor, test_bounding_boxes_tensor,transform=transform)


# Create DataLoader instances
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


# Define the model
fcnn_model = FCNN(input_shape, num_classes=4)



class MinMaxScaler:
    def __init__(self):
        self.min = None
        self.max = None
    
    def fit(self, data):
        self.min = torch.min(data, dim=0)[0]
        self.max = torch.max(data, dim=0)[0]
    
    def transform(self, data):
        return (data - self.min) / (self.max - self.min)
    
    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)

# Define your loss function and optimizer and Scaler
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(fcnn_model.parameters(), lr=0.001)
scaler = MinMaxScaler()

num_epochs = 60



# Training loop
for epoch in range(num_epochs):
    fcnn_model.train()
    running_loss = 0.0
    
    for images, bounding_boxes in train_loader:
        optimizer.zero_grad()
        
        # No need to flatten the input, keep it as 4D tensor [batch_size, channels, height, width]
        outputs = fcnn_model(images)
        
        loss = criterion(outputs, bounding_boxes)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Print predicted bounding box parameters for each batch during training
        print(f"Predicted Bounding Box Parameters (Training):\n{outputs[0]}")  # Modify this according to your output structure

    # Print training loss for each epoch
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

# Evaluation
fcnn_model.eval()
test_loss = 0.0

with torch.no_grad():
    for images, bounding_boxes in test_loader:
        outputs = fcnn_model(images)
        test_loss += criterion(outputs, bounding_boxes).item()
        
        # Print predicted bounding box parameters for each batch during testing
        print(f"Predicted Bounding Box Parameters (Testing):\n{outputs[0]}")  # Modify this according to your output structure

print(f"Test Loss: {test_loss / len(test_loader)}") 

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (51,) + inhomogeneous part.

## Visualizing Predicted Bounding Boxes
Blue is ground truth,
Red is predicted Bounding Box

In [None]:
# Evaluation with visualization
fcnn_model.eval()

# Get a batch of test data
images, bounding_boxes = next(iter(test_loader))

# Perform prediction
with torch.no_grad():
    outputs = fcnn_model(images)

# Plot predicted and ground truth bounding boxes on images
num_samples = 1  # Number of samples to visualize
fig, axes = plt.subplots(nrows=num_samples, ncols=1, figsize=(8, 12))

for i in range(num_samples):
    # Convert tensors to numpy arrays
    img_np = images[i].squeeze().numpy()
    pred_box = outputs[i].numpy()
    true_box = bounding_boxes[i].squeeze().numpy()

    # Display the image
    ax = axes if num_samples == 1 else axes[i]
    ax.imshow(img_np, cmap='gray')
    ax.set_title(f'Sample {i + 1}')

    # Plot predicted box in red
    pred_rect = patches.Rectangle((pred_box[0], pred_box[1]), pred_box[2], pred_box[3], linewidth=2, edgecolor='r', facecolor='none')
    ax.add_patch(pred_rect)

    # Plot ground truth box in green
    true_rect = patches.Rectangle((true_box[0], true_box[1]), true_box[2], true_box[3], linewidth=2, edgecolor='g', facecolor='none')
    ax.add_patch(true_rect)

plt.tight_layout()
plt.show()
