# 1. Environment & Workspace Setup:

### Import Necessary Libraries:
- Import standard and advacned libraries such as NumPy, PyTorch, and Matplotlib, etc.
- Prepare ShapeNet Dataset in Colab

In [None]:
# Standard Libraries
import os

os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

import numpy as np
import matplotlib.pyplot as plt
import json
from tqdm import tqdm
import datetime
import time
import seaborn as sns
import plotly
import plotly.express as px
import pandas as pd
import random
import statistics
import shutil
import glob
import faiss
import pickle
import sklearn
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.manifold import TSNE
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from collections import defaultdict
from mpl_toolkits.mplot3d import Axes3D

# PyTorch and related modules
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import random_split
import torch.nn.functional as F

# bug check
import faulthandler
faulthandler.enable()

# Monitoring and summary
from torch.utils.tensorboard import SummaryWriter

# Set random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)  # Enforce the use of deterministic algorithms

# Check if CUDA (GPU support) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")


In [None]:
def gather_statistics(root_dir):
    data = []

    # Go across each class directory
    for class_name in os.listdir(root_dir):
        class_dir = os.path.join(root_dir, class_name)

        # Check if it's a directory or not
        if not os.path.isdir(class_dir):
            continue

        num_files = 0
        point_counts = []

        # Scan through each point cloud file in the class directory
        for filename in os.listdir(class_dir):
            if filename.endswith('.txt'):
                file_path = os.path.join(class_dir, filename)
                points = np.loadtxt(file_path, delimiter=' ', usecols=(0, 1, 2))
                num_points = points.shape[0]
                point_counts.append(num_points)
                num_files += 1

        max_points = max(point_counts)
        min_points = min(point_counts)
        avg_points = int(statistics.mean(point_counts))
        median_points = int(statistics.median(point_counts))
        std_dev_points = int(statistics.stdev(point_counts))

        max_points_file = [filename for filename, count in zip(os.listdir(class_dir), point_counts) if count == max_points][0]
        min_points_file = [filename for filename, count in zip(os.listdir(class_dir), point_counts) if count == min_points][0]

        data.append([class_name, num_files, max_points, max_points_file, min_points, min_points_file, avg_points, median_points, std_dev_points])

    # Convert data to pandas DataFrame
    df = pd.DataFrame(data, columns=["Class", "Num of Files", "Max Points", "Max Points File", "Min Points", "Min Points File", "Avg Points", "Median Points", "Std Dev Points"])

    # Overall statistics
    overall_stats = {
        "Total Files": df["Num of Files"].sum(),
        "Average Points (Overall)": int(df["Avg Points"].mean()),
        "Median Points (Overall)": int(df["Median Points"].median()),
        "Std Dev Points (Overall)": int(df["Std Dev Points"].mean()),
        "Max Points (Overall)": df["Max Points"].max(),
        "Min Points (Overall)": df["Min Points"].min()
    }

    # Visualization can give more insights about the dataset.
    df.set_index("Class")[["Max Points", "Min Points", "Avg Points", "Median Points"]].plot(kind='bar', figsize=(15, 7))
    plt.title("Point Cloud Statistics per Class")
    plt.ylabel("Number of Points")
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.show()

    return df, overall_stats

# Directory paths for the datasets
original_dir= "/home/ph517705/jupyterlab/ShapeNet/ShapeNet"
train_dir = "/home/ph517705/jupyterlab/ShapeNet60"
val_dir = "/home/ph517705/jupyterlab/ShapeNet_Validation20"
test_dir = "/home/ph517705/jupyterlab/ShapeNet_Test20"

# Gather statistics for each dataset
original_stats_df, original_overall_stats = gather_statistics(original_dir)
train_stats_df, train_overall_stats = gather_statistics(train_dir)
val_stats_df, val_overall_stats = gather_statistics(val_dir)
test_stats_df, test_overall_stats = gather_statistics(test_dir)

# Print the statistics
print("original Set Statistics:\n", original_stats_df)
print("\nOverall Training Set Statistics:\n", original_overall_stats)


print("Training Set Statistics:\n", train_stats_df)
print("\nOverall Training Set Statistics:\n", train_overall_stats)

print("\nValidation Set Statistics:\n", val_stats_df)
print("\nOverall Validation Set Statistics:\n", val_overall_stats)

print("\nTest Set Statistics:\n", test_stats_df)
print("\nOverall Test Set Statistics:\n", test_overall_stats)

***
## 2. Data Loading & Dataset Preprocessing:
##### When dealing with point cloud data and deep learning models, preprocessing is an essential step to ensure that the data is in a format suitable for training. For PointNet, and 3D point cloud data in general, there are several preprocessing steps that can help improve model performance.

In [None]:
class ImprovedSpatialDataInterface:
    def __init__(self, root_dir):
        """
        Initialize the interface with the root directory containing the point cloud data files.
        Arguments:
        - root_dir (str): Path to the root directory containing the point cloud data files.
        """
        self.root_dir = root_dir

        # Use glob to get all .txt files
        self.all_files = glob.glob(os.path.join(root_dir, '**/*.txt'), recursive=True)

        # Simplified label mapping
        labels = {self.get_label(file_path) for file_path in self.all_files}
        self.label_mapping = {label: i for i, label in enumerate(labels)}
        self.inverse_label_mapping = {i: label for label, i in self.label_mapping.items()}  # Inverse mapping

    def get_label(self, file_path):
        return os.path.dirname(file_path).split('/')[-1]

    def __len__(self):
        return len(self.all_files)

    def get_file(self, index):
        """
        Return the spatial data from the file at the given index.
        Arguments:
        - index (int): index of the file to access.
        Returns:
        - dict: Dictionary containing the point cloud and numerical label.
        """
        file_path = self.all_files[index]
        point_cloud = np.loadtxt(file_path, delimiter=' ', usecols=(0, 1, 2))
        point_cloud = point_cloud.astype(np.float32)  # Convert to float32
        label = self.get_label(file_path)
        numerical_label = self.label_mapping[label]
        return {"point_cloud": point_cloud, "label": numerical_label}

    def get_all_files(self):
        """Return all file paths in the interface."""
        return self.all_files


In [None]:
#Now we add the root directory for the Dataset globally to ensure it works

# Initialize the interface with the dataset root directory

# Local paths for JupyterLab
train_data_interface = ImprovedSpatialDataInterface('/home/ph517705/jupyterlab/Autoencoder/ShapeNet/ShapeNet')



In [None]:
# This function should take care of the normalization step.
# Once we've loaded our data through the interface module > dataset class module > dataloder module, we can apply this function to each point cloud to ensure our dataset is normalized.

def normalize_point_cloud(point_cloud: np.ndarray) -> np.ndarray:
    """
    Normalize the given point cloud such that it's centered around the origin
    and scaled to fit within a unit sphere.

    Args:
    - point_cloud (np.ndarray): The input point cloud of shape (N, 3).

    Returns:
    - np.ndarray: The normalized point cloud of shape (N, 3).
    """
    #print("Entering normalize_point_cloud with shape:", point_cloud.shape)

    # Data Going In: Check if the data type is float32 for compatibility with GPUs
    assert point_cloud.dtype == np.float32, "Data In: Data type mismatch. Expected float32, but got {}.".format(point_cloud.dtype)

    # Centering
    centroid = np.mean(point_cloud, axis=0)
    point_cloud -= centroid

    # Scaling
    furthest_distance = np.max(np.sqrt(np.sum(point_cloud**2, axis=1)))
    point_cloud /= furthest_distance

    # Check if the data is normalized to fit within the unit sphere
    max_distance = np.max(np.sqrt(np.sum(point_cloud**2, axis=1)))
    assert np.isclose(max_distance, 1.0, atol=1e-5), "Data normalization failed. Expected data to fit within unit sphere, but got max distance of {}.".format(max_distance)

    # Data Going Out: Check if the data type is float32 for compatibility with GPUs
    assert point_cloud.dtype == np.float32, "Data Out: Data type mismatch. Expected float32, but got {}.".format(point_cloud.dtype)

    # Check if the shape is Nx3
    assert point_cloud.shape[1] == 3, "Shape mismatch. Expected Nx3 format, but got shape {}.".format(point_cloud.shape)

    #print("Exiting normalize_point_cloud with shape:", point_cloud.shape)
    return point_cloud


In [None]:
def jitter_point_cloud(point_cloud: np.ndarray, sigma: float = 0.05, clip: float = 0.5) -> np.ndarray:
    """
    Jitter the points in the point cloud with additional checks.

    Args:
        point_cloud (np.ndarray): The input point cloud data of shape Nx3.
        sigma (float): Standard deviation of the Gaussian noise. Default is 0.05.
        clip (float): Values of noise are clipped to lie within the range [-clip, clip]. Default is 0.5.

    Returns:
        np.ndarray: The jittered point cloud.
    """
    #print("Entering jitter_point_cloud with shape:", point_cloud.shape)  # bug check

    # Check if the data type is float32
    assert point_cloud.dtype == np.float32, "Data In: type mismatch before jittering. Expected float32."

    # Check if the shape is Nx3
    assert point_cloud.shape[1] == 3, "Shape mismatch. Input should be in Nx3 format."

    # Save a copy of the original data for displacement computation
    original_data = point_cloud.copy()

    jittered_data = np.clip(sigma * np.random.randn(*point_cloud.shape), -1 * clip, clip)

    # Additional checks to ensure jitter data is non-zero
    assert np.any(jittered_data != 0), "The jittered data contains only zeros."
    assert np.max(jittered_data) <= clip, "Some jitter values exceed the positive clip limit."
    assert np.min(jittered_data) >= -clip, "Some jitter values exceed the negative clip limit."

    point_cloud += jittered_data

    # Post-jittering checks:

    # Check if the data type is still float32
    assert point_cloud.dtype == np.float32, "Data Out: type mismatch after jittering. Expected float32."

    # Check if the shape remains Nx3
    assert point_cloud.shape[1] == 3, "Shape mismatch after jittering. Expected Nx3 format."

    #print("Exiting jitter_point_cloud with shape:", point_cloud.shape)  # bug check

    return point_cloud


In [None]:
def random_rotation(point_cloud: np.ndarray) -> np.ndarray:
    """
    Randomly rotate the point cloud around the z-axis (vertical axis).

    Args:
        point_cloud (np.ndarray): The input point cloud data of shape Nx3.

    Returns:
        np.ndarray: The rotated point cloud.
    """
    #print("Entering random_rotation with shape:", point_cloud.shape)  # bug check

    # Check if the data type is float32 for consistency
    assert point_cloud.dtype == np.float32, "Data In: type mismatch before rotation. Expected float32."

    # Check if the shape is Nx3
    assert point_cloud.shape[1] == 3, "Shape mismatch. Input should be in Nx3 format."

    # Generate a random rotation angle
    theta = np.random.uniform(0, 2*np.pi)

    # Create the rotation matrix around z-axis
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
                                [np.sin(theta), np.cos(theta), 0],
                                [0, 0, 1]], dtype=np.float32)  # Change dtype to float32

    # Apply the rotation matrix to the point cloud in-place
    point_cloud[:] = point_cloud.dot(rotation_matrix)

    # Post-rotation checks:

    # Check if the data type is still float32
    assert point_cloud.dtype == np.float32, "Data Out: type mismatch after rotation. Expected float32."

    # Check if the shape remains Nx3
    assert point_cloud.shape[1] == 3, "Shape mismatch after rotation. Expected Nx3 format."

    #print("Exiting random_rotation with shape:", point_cloud.shape)  # bug check

    return point_cloud


In [None]:
class PointCloudDataset(Dataset):
    def __init__(self, data_interface, preprocess_funcs=None, n_points=1024, indices=None, sampling_method='random_duplication', use_gpu=False, cache_dir=None):
        """
        Initialize the dataset for point cloud data.

        Args:
            data_interface: Interface to load the point cloud data.
            preprocess_funcs: List of preprocessing functions to apply on the point clouds.
            n_points: Number of points in each point cloud.
            indices: List of indices to use for accessing the dataset.
            sampling_method: Method for sampling or adjusting point clouds ('random_duplication' or 'fps_knn').
            use_gpu: Whether to move point clouds to GPU.
            cache_dir: Directory to cache preprocessed point clouds.
        """
        self.data_interface = data_interface
        self.preprocess_funcs = preprocess_funcs or []
        self.n_points = n_points
        self.indices = indices or list(range(len(self.data_interface.all_files)))
        self.sampling_method = sampling_method
        self.use_gpu = use_gpu
        self.cache_dir = cache_dir

        if cache_dir:
            os.makedirs(cache_dir, exist_ok=True)

    def __len__(self):
        return len(self.indices)

    def _farthest_point_sample(self, point_cloud, num_samples):
        """Sample num_samples points from point_cloud using farthest point sampling (FPS)."""
        farthest_pts = np.zeros((num_samples, 3))
        farthest_pts[0] = point_cloud[np.random.choice(len(point_cloud))]
        distances = np.linalg.norm(point_cloud - farthest_pts[0], axis=1)

        for i in range(1, num_samples):
            farthest_pts[i] = point_cloud[np.argmax(distances)]
            distances = np.minimum(distances, np.linalg.norm(point_cloud - farthest_pts[i], axis=1))

        return farthest_pts

    def _random_sample(self, point_cloud):
        """Randomly sample points from the point cloud."""
        num_points = point_cloud.shape[0]
        if num_points > self.n_points:
            sampled_indices = np.random.choice(num_points, self.n_points, replace=False)
        else:
            sampled_indices = np.random.choice(num_points, self.n_points, replace=True)
        return point_cloud[sampled_indices]

    def _knn_augment(self, point_cloud):
        """Augment the point cloud to the desired size using K-NN."""
        num_points = point_cloud.shape[0]
        k = max(1, self.n_points - num_points)
        if k > 0:
            nn = NearestNeighbors(n_neighbors=k).fit(point_cloud)
            knn_indices = nn.kneighbors(point_cloud, return_distance=False).flatten()
            additional_points = point_cloud[knn_indices]
            return np.vstack((point_cloud, additional_points))
        else:
            return point_cloud

    def _adjust_size(self, point_cloud):
        """Adjust the size of the point cloud based on the sampling method."""
        if self.sampling_method == 'random_duplication':
            return self._random_sample(point_cloud)
        elif self.sampling_method == 'fps_knn':
            if len(point_cloud) > self.n_points:
                return self._farthest_point_sample(point_cloud, self.n_points)
            else:
                return self._knn_augment(point_cloud)[:self.n_points]
        else:
            raise ValueError(f"Unknown sampling method: {self.sampling_method}")

    def _save_cached_data(self, cache_path, data):
        """Save data to the cache."""
        with open(cache_path, 'wb') as f:
            pickle.dump(data, f)

    def _load_cached_data(self, cache_path):
        """Load data from the cache."""
        with open(cache_path, 'rb') as f:
            return pickle.load(f)

    def __getitem__(self, idx):
        actual_idx = self.indices[idx]
        cache_path = os.path.join(self.cache_dir, f'{actual_idx}.pkl') if self.cache_dir else None

        if cache_path and os.path.exists(cache_path):
            point_cloud, label = self._load_cached_data(cache_path)
        else:
            data = self.data_interface.get_file(actual_idx)
            point_cloud = data["point_cloud"]
            label = data["label"]

            for func in self.preprocess_funcs:
                point_cloud = func(point_cloud)

            point_cloud = self._adjust_size(point_cloud)

            if cache_path:
                self._save_cached_data(cache_path, (point_cloud, label))

        point_cloud = torch.tensor(point_cloud, dtype=torch.float32)
        if self.use_gpu:
            point_cloud = point_cloud.cuda()

        return point_cloud, label

    def refresh_cache(self):
        """Refresh the cached data by reapplying preprocessing functions."""
        if self.cache_dir:
            total_files = len(self.indices)
            for idx, index in enumerate(self.indices):
                cache_path = os.path.join(self.cache_dir, f'{index}.pkl')
                data = self.data_interface.get_file(index)
                point_cloud = data["point_cloud"]
                label = data["label"]

                for func in self.preprocess_funcs:
                    point_cloud = func(point_cloud)

                point_cloud = self._adjust_size(point_cloud)
                self._save_cached_data(cache_path, (point_cloud, label))

                percent_complete = (idx + 1) / total_files * 100
                print(f"Refreshing Cache: {percent_complete:.2f}%", end='\r')

            print("Refreshing Cache: 100.00% - Completed")

    def delete_cache(self):
        """Delete all cached data."""
        if self.cache_dir:
            pbar = tqdm(total=len(self.indices), desc="Deleting Cache", leave=True)
            for idx in self.indices:
                cache_path = os.path.join(self.cache_dir, f'{idx}.pkl')
                if os.path.exists(cache_path):
                    os.remove(cache_path)
                pbar.update(1)
            pbar.close()


In [None]:
class PointCloudDataLoader:
    def __init__(self, dataset, batch_size=100, shuffle=True, num_workers=4, drop_last=True, track_indices=True):
        """
        Initialize the custom DataLoader for point cloud data.

        Args:
            dataset: The dataset from which to load data.
            batch_size: How many samples per batch to load.
            shuffle: Whether to shuffle the data at every epoch.
            num_workers: How many subprocesses to use for data loading.
            drop_last: Whether to drop the last incomplete batch.
            track_indices: Whether to track batch indices.
        """
        self.dataset = dataset
        self.batch_size = batch_size
        self.track_indices = track_indices
        self.batch_indices = []  # To store batch indices if tracking is enabled

        # Initialize the standard PyTorch DataLoader
        self.data_loader = DataLoader(
            self.dataset,
            batch_size=batch_size,
            shuffle=shuffle,
            num_workers=num_workers,
            drop_last=drop_last
        )

    def __iter__(self):
        """
        Return an iterator over the DataLoader.
        """
        return iter(self.data_loader)

    def __len__(self):
        """
        Return the number of batches per epoch.
        
        Returns:
            int: Number of batches per epoch.
        """
        return len(self.data_loader)

    def get_batches(self):
        """
        Generator that yields batches of point clouds and labels.
        Tracks indices of batches if enabled.

        Yields:
            tuple: A batch of point clouds and their labels.
        """
        for i, (point_clouds, labels) in enumerate(self.data_loader):
            if self.track_indices:
                self.batch_indices.append(i)
            yield point_clouds, labels

    def delete_cache(self):
        """
        Delete the cache of the dataset by calling the dataset's delete_cache method.
        """
        self.dataset.delete_cache()


***
***
**<span style="color:red">[Coding Part]</span>**
### 3.2. Input Transformation Network

The input transformation network aims to learn an affine transformation matrix to align the input point cloud to a canonical space. This transformation helps in making the model more robust to different orientations of the input. The network is a mini-PointNet which outputs a \(3 x 3\) transformation matrix.

This network will have shared MLP layers (64, 128, 1024) followed by a global max pooling and then two dense layers with output dimensions 512 and 256. The final output will be the \(3 x 3\) transformation matrix.



Below is a summary of the operations*:

1. **Convolutional Layers**: The point cloud data is passed through three consecutive 1D convolutional layers with 64, 128, and 1024 filters, respectively. Each filter has a kernel size of 1.
2. **Batch Normalization**: After each convolution, batch normalization is applied to normalize the activations.
3. **ReLU Activation**: The ReLU activation function is used after each batch normalization.
4. **Max Pooling**: The maximum value is taken across the 1024 channels, reducing the dimension to (batch_size, 1024).
5. **Fully Connected Layers**: Three fully connected layers are used to map the 1024-dimensional vector to a 3x3 matrix.
6. **Identity Matrix Addition**: An identity matrix is added to the 3x3 matrix, ensuring that the transformation is close to an identity transformation at the beginning of training.

In [None]:
class InputTransformationNetwork(nn.Module):
    def __init__(self, n_points):
        super(InputTransformationNetwork, self).__init__()

        self.conv1 = nn.Conv1d(3, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)

        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)

        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 9)

        self.bn_fc1 = nn.BatchNorm1d(512)
        self.bn_fc2 = nn.BatchNorm1d(256)

        self.transform = nn.Parameter(torch.eye(3).float().unsqueeze(0).repeat(1, 1, 1), requires_grad=True)

    def forward(self, points):
        points_input = points.clone()  # Keep a copy of the original input

        # Transpose the tensor to match the expected shape for Conv1d
        points = points.transpose(1, 2).contiguous()

        # Apply the first convolutional layer followed by batch normalization and ReLU activation
        features = self.bn1(torch.relu(self.conv1(points)))
        features = self.bn2(torch.relu(self.conv2(features)))
        features = self.bn3(torch.relu(self.conv3(features)))

        # Max pooling across the N dimension (points)
        features = torch.max(features, 2, keepdim=True)[0]
        features = features.reshape(-1, 1024)

        # Fully connected layers with batch normalization and ReLU activation
        features = self.bn_fc1(torch.relu(self.fc1(features)))
        features = self.bn_fc2(torch.relu(self.fc2(features)))
        features = torch.relu(self.fc3(features))

        # Reshape the output to form the transformation matrix and add the identity matrix
        transform = features.reshape(-1, 3, 3) + self.transform

        # Apply the transformation to the original input
        points_input_transposed = torch.transpose(points_input, 1, 2)
        transformed_points = torch.bmm(transform, points_input_transposed)
        transformed_points = torch.transpose(transformed_points, 1, 2)

        return transformed_points, transform


---

### 3.3. Feature Transformation Network

The Feature Transform Network is conceptually similar to the Input Transform Network. However, while the Input Transform Network outputs a transformation matrix for the input point cloud (ensuring spatial invariance), the Feature Transform Network outputs a transformation matrix for the features (ensuring the network learns more discriminative features).


#### Feature Transform Network Overview:

- **Objective**: To learn an affine transformation for the feature space.
- **Architecture**:
    - A series of shared MLPs similar to the Input Transform Network.
    - Max-pooling layer.
    - Fully connected layers.
    - The output is a transformation matrix. However, unlike the Input Transform Network, which outputs a \(3 x 3\) matrix, the Feature Transform Network typically outputs a larger matrix, in our case \(64 x 64\) (or depending on the number of features/channels).

    This module operates in a similar manner to the input T-net, nothing special.
  


In [None]:
class FeatureTransformationNetwork(nn.Module):
    def __init__(self, n_points):
        super(FeatureTransformationNetwork, self).__init__()

        self.conv1 = nn.Conv1d(64, 128, 1)
        self.conv2 = nn.Conv1d(128, 1024, 1)

        self.bn1 = nn.BatchNorm1d(128)
        self.bn2 = nn.BatchNorm1d(1024)

        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 4096)  # Output size will be 64 * 64

        self.bn_fc1 = nn.BatchNorm1d(512)
        self.bn_fc2 = nn.BatchNorm1d(256)

        self.transform = nn.Parameter(torch.eye(64).float().unsqueeze(0).repeat(1, 1, 1), requires_grad=True)

    def forward(self, features):
        # Transpose the tensor to match the expected shape for Conv1d (B, 64, N)
        features_transposed = features.transpose(1, 2)

        # Apply the first convolutional layer followed by batch normalization and ReLU activation
        features = self.bn1(torch.relu(self.conv1(features_transposed)))
        features = self.bn2(torch.relu(self.conv2(features)))

        # Max pooling across the N dimension (points)
        features = torch.max(features, 2, keepdim=True)[0]
        features = features.reshape(-1, 1024)

        # Fully connected layers with batch normalization and ReLU activation
        features = self.bn_fc1(torch.relu(self.fc1(features)))
        features = self.bn_fc2(torch.relu(self.fc2(features)))
        features = torch.relu(self.fc3(features))

        # Reshape the output to form the transformation matrix and add the identity matrix
        transform = features.reshape(-1, 64, 64) + self.transform

        # Apply the transformation to the original features
        transformed_features = torch.bmm(transform, features_transposed)
        transformed_features = torch.transpose(transformed_features, 1, 2)

        return transformed_features, transform


---

### 3.4. PointNet Encoder Network

The PointNet Encoder Network is responsible for extracting high-level global features from the input point cloud. It consists of multiple shared Multi-Layer Perceptrons (MLPs) that progressively increase the feature dimensionality, followed by a max-pooling operation to obtain a global feature vector. This global feature is then used for downstream tasks such as classification or segmentation.

#### PointNet Encoder Network Overview:

- **Objective**: To extract a global feature vector from the input point cloud that captures the overall shape information.
- **Architecture**:
    - **Shared MLPs**: The network uses two sequential shared MLP blocks.
        - **First Shared MLP**: This block consists of two Conv1d layers that take the input point cloud and map it to a higher-dimensional feature space, typically \(3 \rightarrow 64 \rightarrow 64\).
        - **Second Shared MLP**: This block further processes the features through three Conv1d layers, mapping from the feature space to an even higher-dimensional space, typically \(64 \rightarrow 64 \rightarrow 128 \rightarrow 1024\).
    - **Feature Transformation (Optional)**: If the T-Net is enabled, a feature transformation network is applied after the first shared MLP to learn a transformation matrix for the feature space.
    - **Max-Pooling**: After the second shared MLP, a max-pooling layer is applied across all points to obtain the global feature vector, which is typically of size 1024.

    The PointNet Encoder is a crucial component of the architecture, ensuring that the network learns to capture global geometric information from the input point cloud.

---


In [None]:
class PointNetEncoder(nn.Module):
    def __init__(self, n_points, use_t_net=False):
        super(PointNetEncoder, self).__init__()
        self.use_transform_networks = use_t_net

        # Initialize the Input Transformation Network if use_t_net is True
        if self.use_transform_networks:
            self.input_transform = InputTransformationNetwork(n_points)

        # First shared MLP (Multi-Layer Perceptron)
        self.shared_mlp1 = nn.Sequential(
            nn.Conv1d(3, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU()
        )

        # Initialize the Feature Transformation Network if use_t_net is True
        if self.use_transform_networks:
            self.feature_transform = FeatureTransformationNetwork(n_points)

        # Second shared MLP
        self.shared_mlp2 = nn.Sequential(
            nn.Conv1d(64, 64, 1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Conv1d(64, 128, 1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Conv1d(128, 1024, 1),
            nn.BatchNorm1d(1024),
            nn.ReLU()
        )

    def forward(self, points):
        # Step 1: Input Transform Network (if enabled)
        if self.use_transform_networks:
            points, input_trans = self.input_transform(points)
        else:
            input_trans = None

        # Step 2: First Shared MLP
        features = points.transpose(1, 2).contiguous()  # Transpose for 1D Conv
        features = self.shared_mlp1(features)
        features = features.transpose(1, 2).contiguous()  # Transpose back

        # Step 3: Feature Transform Network (if enabled)
        if self.use_transform_networks:
            features, feature_trans = self.feature_transform(features)
        else:
            feature_trans = None

        # Step 4: Second Shared MLP
        features = features.transpose(1, 2).contiguous()  # Transpose for 1D Conv
        features = self.shared_mlp2(features)
        features = features.transpose(1, 2).contiguous()  # Transpose back

        # Step 5: Max Pooling to get global feature (shape: Bx1024)
        global_feature = torch.max(features, 1, keepdim=False)[0]

        return global_feature, (input_trans, feature_trans)


### 3.4. PointNet Decoder

The `PointNetDecoder` is the final part of the PointNet-based autoencoder architecture. It takes the global feature vector generated by the encoder and reconstructs the original point cloud. This step is crucial in ensuring that the encoded features retain enough information to accurately reconstruct the original input.

#### PointNet Decoder Overview:

- **Objective**: To reconstruct the original point cloud from the global feature vector.
- **Architecture**:
    - **Fully Connected Layers**: 
        - The decoder consists of three fully connected layers. The first two layers use ReLU activations and Dropout for regularization.
        - The first layer (`fc1_layer`) takes the 1024-dimensional global feature vector (matching the encoder's output) and reduces it to 512 dimensions.
        - The second layer (`fc2_layer`) further reduces the dimensionality to 256.
        - The final layer (`fc3_layer`) expands the 256-dimensional vector back to the original number of points, each with 3 coordinates (x, y, z).
    - **Reshaping**: The output is reshaped to match the original point cloud dimensions (batch size, number of points, 3).

In [None]:
class PointNetDecoder(nn.Module):
    def __init__(self, num_points):
        super(PointNetDecoder, self).__init__()
        self.num_points = num_points

        self.fc1_layer = nn.Sequential(
            nn.Linear(1024, 512),  # 1024 is the input size, matching the encoder's output
            nn.ReLU(),
            nn.Dropout(p=0.3)
        )
        self.fc2_layer = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(p=0.3)
        )
        self.fc3_layer = nn.Linear(256, num_points * 3)

    def forward(self, global_feature):
        x = self.fc1_layer(global_feature)
        x = self.fc2_layer(x)
        x = self.fc3_layer(x)
        x = x.view(-1, self.num_points, 3)  # Reshape to (batch_size, num_points, 3)
        return x


### 3.5. Chamfer Loss Function

The Chamfer loss is a widely used metric in point cloud reconstruction tasks. It measures the similarity between two point clouds by calculating the average of the closest point distances between each point in one cloud to the other.

#### Chamfer Loss Overview:

- **Objective**: To quantify how close the predicted point cloud is to the target (original) point cloud by evaluating the nearest point distances between the two sets.
- **Procedure**:
  - **Pairwise Distance Calculation**: The loss function computes the L2 (Euclidean) distance between each point in the predicted point cloud and each point in the target point cloud.
  - **Nearest Point Search**:
    - For each point in the predicted cloud, the nearest point in the target cloud is identified by finding the minimum distance.
    - Similarly, for each point in the target cloud, the nearest point in the predicted cloud is identified.
  - **Loss Calculation**: The Chamfer loss is then calculated as the average of these minimum distances. It ensures that every point in the predicted cloud is close to some point in the target cloud and vice versa.

#### Key Features:

- **Symmetry**: The Chamfer loss treats the two point clouds symmetrically, penalizing both missed points in the prediction and extraneous points not present in the target.
- **Applicability**: This loss is particularly useful in tasks involving point cloud generation and reconstruction, as it directly measures the spatial discrepancy between two sets of points.

This loss function helps the model learn to reconstruct point clouds that closely resemble the original input, making it a crucial component in training 3D autoencoders and generative models.


In [None]:
def chamfer_loss(pred, target):
    """
    Calculate the Chamfer loss between predicted and target point clouds.
    
    Args:
    - pred (torch.Tensor): Predicted point cloud of shape (batch_size, num_points, 3)
    - target (torch.Tensor): Target (original) point cloud of shape (batch_size, num_points, 3)
    
    Returns:
    - loss (torch.Tensor): The calculated Chamfer loss
    """
    # Compute pairwise distance between each point in pred and each point in target
    pred_expand = pred.unsqueeze(2)  # Shape: (batch_size, num_points, 1, 3)
    target_expand = target.unsqueeze(1)  # Shape: (batch_size, 1, num_points, 3)
    
    # L2 distance between each pair of points
    distances = torch.norm(pred_expand - target_expand, dim=3)  # Shape: (batch_size, num_points, num_points)
    
    # For each point in pred, find the nearest point in target (min over target points)
    min_dist_pred_to_target, _ = torch.min(distances, dim=2)  # Shape: (batch_size, num_points)
    
    # For each point in target, find the nearest point in pred (min over pred points)
    min_dist_target_to_pred, _ = torch.min(distances, dim=1)  # Shape: (batch_size, num_points)
    
    # Chamfer loss is the average of these minimum distances
    loss = torch.mean(min_dist_pred_to_target) + torch.mean(min_dist_target_to_pred)
    
    return loss


### 3.6. PointNet Autoencoder

The PointNetAutoencoder is a neural network architecture designed to learn a compressed representation of 3D point clouds through unsupervised learning. It consists of an encoder and a decoder, where the encoder maps the input point cloud to a global feature vector, and the decoder reconstructs the original point cloud from this feature vector.

#### PointNetAutoencoder Overview:

- **Objective**: To reconstruct a 3D point cloud from a compressed feature representation, enabling the model to learn meaningful and compact embeddings of point cloud data.
- **Architecture**:
  - **Encoder**: 
    - Uses the `PointNetEncoder` module, which processes the input point cloud and outputs a global feature vector along with optional transformation matrices (if T-Net is used).
    - The encoder captures the essential features of the point cloud and encodes them into a fixed-size vector.
  - **Decoder**:
    - Uses the `PointNetDecoder` module, which takes the global feature vector and reconstructs the original point cloud.
    - The decoder learns to reverse the encoding process, transforming the compressed feature vector back into a spatial representation of the point cloud.
- **Functionality**:
  - **Forward Pass**: The input point cloud is first passed through the encoder, producing a global feature and transformation matrices. The global feature is then fed into the decoder, which outputs the reconstructed point cloud.
  - **Loss Function**: The reconstruction loss, typically the Chamfer loss, is used to measure the difference between the original and reconstructed point clouds, guiding the model's learning.

#### Key Features:

- **Compression**: The model learns a compressed, latent representation of the input point cloud, which can be used for various downstream tasks like classification, segmentation, or generation.
- **Transformations**: Optionally, T-Nets can be used to apply spatial transformations to the input and feature space, helping the model learn more invariant and discriminative representations.

This autoencoder architecture is a powerful tool for learning meaningful representations of 3D point clouds in an unsupervised manner, enabling tasks such as dimensionality reduction, feature extraction, and point cloud generation.


In [None]:
class PointNetAutoencoder(nn.Module):
    def __init__(self, num_points=1024, use_t_net=False):
        """
        Initialize the PointNetAutoencoder.

        Args:
            num_points (int): Number of points in each point cloud.
            use_t_net (bool): Whether to use T-Net for input and feature transformations.
        """
        super(PointNetAutoencoder, self).__init__()
        self.encoder = PointNetEncoder(n_points=num_points, use_t_net=use_t_net)  # Using your provided encoder
        self.decoder = PointNetDecoder(num_points=num_points)  # Using the decoder you provided

    def forward(self, points):
        """
        Forward pass of the PointNetAutoencoder.

        Args:
            points (torch.Tensor): Input point cloud tensor of shape (batch_size, num_points, 3).

        Returns:
            reconstructed_points (torch.Tensor): Reconstructed point cloud of shape (batch_size, num_points, 3).
            transformations (tuple): Transformations applied by T-Nets (if used).
        """
        # Encode the point cloud to get the global feature
        global_feature, transformations = self.encoder(points)

        # Decode the global feature to reconstruct the point cloud
        reconstructed_points = self.decoder(global_feature)

        return reconstructed_points, transformations


### Autoencoder Training Loop

The `CustomPointNetAutoencoderTrainer` class is responsible for managing the training process of the PointNet Autoencoder model. This trainer handles the core aspects of training, including forward and backward passes, loss calculation, model updates, and logging.

#### Key Components of the Trainer:

1. **Initialization (`__init__` method):**
   - **Model Instance**: The autoencoder model to be trained.
   - **Optimizer**: The optimizer instance used to update model weights based on the computed gradients.
   - **Learning Rate Scheduler**: Dynamically adjusts the learning rate during training.
   - **Computation Device**: Specifies whether training will be done on a GPU (`cuda`) or CPU.
   - **Training Loader**: DataLoader for the training dataset, providing batches of point clouds.
   - **Cache Management**: Handles cache refreshing during training if enabled.
   - **Run Directory**: Directory to store model checkpoints, logs, and other outputs.
   - **Seed**: Optional random seed for reproducibility.

2. **Training Loop (`train_model` method):**
   - **Epochs**: The training process iterates over a specified number of epochs.
   - **Data Preparation**: Each batch of point clouds is moved to the computation device.
   - **Forward Pass**: The model encodes and decodes the input point clouds.
   - **Loss Calculation**: The Chamfer loss is computed between the predicted and target point clouds.
   - **Backward Pass**: Gradients are calculated, and the optimizer updates the model parameters.
   - **Learning Rate Adjustment**: The learning rate scheduler adjusts the learning rate at the end of each epoch.
   - **Logging and Saving**:
     - **Epoch Time**: The time taken for each epoch is recorded.
     - **Training Loss**: The average training loss for each epoch is calculated and logged.
     - **Best Model Saving**: The model state is saved whenever a new lowest training loss is observed.

3. **Completion**:
   - **Total Training Time**: The total time taken for the training process is logged.

This training loop is essential for iteratively refining the model, ensuring that it learns to accurately reconstruct point clouds from their encoded representations.


In [None]:
class CustomPointNetAutoencoderTrainer:
    def __init__(
        self, model_instance, optim_instance, lr_scheduler, computation_device, 
        training_loader, refresh_cache=False, 
        refresh_interval=10, run_dir=None, seed=None
    ):
        """
        Initialize the trainer with all necessary components.

        Args:
            model_instance: The autoencoder model to be trained.
            optim_instance: The optimizer to be used for training.
            lr_scheduler: Learning rate scheduler for dynamic adjustment.
            computation_device: The device to perform training on (e.g., 'cuda').
            training_loader: DataLoader for the training data.
            refresh_cache: If True, refresh the cache during training.
            refresh_interval: Interval for refreshing the cache.
            run_dir: Directory to save outputs and logs.
            seed: Random seed for reproducibility.
        """
        self.model_instance = model_instance
        self.training_loader = training_loader
        self.optim_instance = optim_instance
        self.lr_scheduler = lr_scheduler
        self.computation_device = computation_device
        self.refresh_cache = refresh_cache
        self.refresh_interval = refresh_interval
        self.run_dir = run_dir  # Directory to save outputs for this run
        self.seed = seed
        
        # Track the best training loss for saving the best model
        self.best_train_loss = float('inf')
        self.epoch_times = []

    def train_model(self, epoch_count):
        """
        Train the autoencoder model for a specified number of epochs.

        Args:
            epoch_count: Number of epochs to train the model.
        """
        total_start_time = time.time()
        
        for epoch in tqdm(range(epoch_count), desc="Epochs"):
            epoch_start_time = time.time()
            self.model_instance.train()  # Set the model to training mode
            
            running_loss = 0.0
            for batch_idx, (point_clouds, _) in enumerate(self.training_loader):
                # Move data to the computation device (GPU or CPU)
                point_clouds = point_clouds.to(self.computation_device)
                
                # Zero the parameter gradients
                self.optim_instance.zero_grad()
                
                # Forward pass: Encode and decode the point clouds
                reconstructed_points, _ = self.model_instance(point_clouds)
                
                # Compute the Chamfer loss
                loss = chamfer_loss(reconstructed_points, point_clouds)
                
                # Backward pass: Compute gradients and update parameters
                loss.backward()
                self.optim_instance.step()
                
                running_loss += loss.item()
        
            # Adjust learning rate based on the scheduler
            self.lr_scheduler.step()
            
            # Calculate average training loss for the epoch
            avg_train_loss = running_loss / len(self.training_loader)
            
            # Log epoch time and loss
            epoch_end_time = time.time()
            self.epoch_times.append(epoch_end_time - epoch_start_time)
            
            # Print or log epoch details
            print(f"Epoch [{epoch + 1}/{epoch_count}], Train Loss: {avg_train_loss:.4f}")
            
            # Save the best model based on training loss
            if avg_train_loss < self.best_train_loss:
                self.best_train_loss = avg_train_loss
                torch.save(self.model_instance.state_dict(), 'best_autoencoder_model.pth')
        
        total_end_time = time.time()
        print(f"Training completed in {(total_end_time - total_start_time) / 60:.2f} minutes.")


### Autoencoder Training Initialization and Execution

The function `init_and_run_autoencoder_training` is designed to initialize and execute the training process for the PointNet Autoencoder model. This function streamlines the setup of the model, optimizer, scheduler, and training loop, and then launches the training process.

#### Key Components:

1. **Computation Device Setup:**
   - **Device Selection**: The function checks for the availability of a GPU (`cuda`) and sets the computation device accordingly. If no GPU is available, it defaults to the CPU.

2. **Random Seed Setup:**
   - **Seed Initialization**: If a random seed is provided, it ensures that the seed is set for PyTorch and CUDA. This is crucial for achieving reproducibility in experiments by ensuring that the model's training process behaves consistently across runs.

3. **Model Initialization:**
   - **PointNetAutoencoder**: The autoencoder model is initialized with the specified number of points and whether to use T-Net modules.

4. **Optimizer and Scheduler Setup:**
   - **Optimizer**: Adam optimizer is used to update the model's parameters during training. It is initialized with the specified learning rate.
   - **Learning Rate Scheduler**: The scheduler is responsible for adjusting the learning rate at specified intervals (`step_sz`) by the decay factor. This helps in controlling the learning rate dynamically, allowing for more effective training.

5. **DataLoader Initialization:**
   - **Training DataLoader**: The DataLoader is set up to handle the batch processing of the training dataset. It ensures that the data is efficiently loaded and shuffled for each epoch.

6. **Trainer Initialization:**
   - **CustomPointNetAutoencoderTrainer**: The trainer is initialized with the model, optimizer, scheduler, device, DataLoader, and other necessary configurations. This trainer handles the training process, logging, and model saving.

7. **Training Execution:**
   - **Training Process**: The trainer's `train_model` method is called to start the training process. It runs for the specified number of epochs, continuously improving the autoencoder model.

8. **Return Value:**
   - **Trainer Instance**: The function returns the trainer instance, allowing for further inspection or usage after the training process is complete.

This function encapsulates the entire training process, making it easy to set up and run experiments with the PointNet Autoencoder model.


In [None]:
def init_and_run_autoencoder_training(
    train_dataset, 
    batch_sz=100, 
    epochs=50, 
    points=2048, 
    learning_rate=0.001, 
    step_sz=20, 
    decay_factor=0.7, 
    compute_device="cuda", 
    use_t_net=False, 
    run_dir=None, 
    seed=None
):
    """
    Initialize and run the autoencoder training process.

    Args:
        train_dataset: Dataset for training the autoencoder.
        batch_sz: Batch size for training.
        epochs: Number of epochs to train the model.
        points: Number of points in each point cloud.
        learning_rate: Learning rate for the optimizer.
        step_sz: Step size for learning rate scheduler.
        decay_factor: Decay factor for learning rate scheduler.
        compute_device: Device to run the training on (e.g., 'cuda').
        use_t_net: Whether T-Nets are used in the model.
        run_dir: Directory to save outputs and logs.
        seed: Random seed for reproducibility.

    Returns:
        trainer: The trainer instance used for training.
    """
    # Set the computation device
    device = torch.device(compute_device if torch.cuda.is_available() else "cpu")

    # Set random seed if provided
    if seed is not None:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Initialize the PointNetAutoencoder model
    autoencoder_model = PointNetAutoencoder(num_points=points, use_t_net=use_t_net)
    autoencoder_model.to(device)

    # Initialize the optimizer
    optimizer = torch.optim.Adam(autoencoder_model.parameters(), lr=learning_rate)

    # Initialize the learning rate scheduler
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_sz, gamma=decay_factor)

    # Initialize DataLoader for training
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_sz, shuffle=True)

    # Initialize the trainer with no validation or testing
    trainer = CustomPointNetAutoencoderTrainer(
        model_instance=autoencoder_model, 
        optim_instance=optimizer, 
        lr_scheduler=scheduler, 
        computation_device=device, 
        training_loader=train_dataloader, 
        run_dir=run_dir,
        seed=seed
    )

    # Run the training
    trainer.train_model(epochs)

    # Return the trainer instance for later use (if needed)
    return trainer


### Autoencoder Hyperparameter Testing Framework

The `AutoencoderHyperparameterTester` class is designed to facilitate a structured and organized hyperparameter search for training autoencoder models. It provides a systematic way to explore different configurations by managing directories, saving models and metrics, and running training processes for each hyperparameter set.

#### Key Components:

1. **Initialization of the Tester:**
   - **Base Output Directory**: The class is initialized with a base output directory where all results, logs, and model states for each run will be stored. This ensures that each hyperparameter configuration has its results neatly organized in a separate directory.

2. **Run Directory Creation:**
   - **Distinct Run Directories**: For each hyperparameter configuration, a new run directory is created. These directories are sequentially named (`autoencoder_run_1`, `autoencoder_run_2`, etc.), ensuring that the results from different runs do not mix and are easily traceable.

3. **Saving Metrics and Model States:**
   - **Metrics Storage**: The tester saves metrics, such as the best training loss and epoch times, in a JSON file within the run directory. This makes it easier to analyze the performance of different configurations after the experiments.
   - **Model State Saving**: The class also saves both the complete model state dictionary and the encoder's state dictionary separately. This allows for later retrieval of the trained model and the encoder, which can be used in other experiments or for further fine-tuning.

4. **Running Hyperparameter Search:**
   - **Hyperparameter Exploration**: The `run_hyperparameter_search` method iterates through a list of hyperparameter configurations, running a training process for each. The method extracts the relevant hyperparameters, sets up the training environment, and initializes the autoencoder model.
   - **Training Execution**: For each configuration, the method calls the `init_and_run_autoencoder_training` function, which manages the entire training process. After training, it stores the resulting model states and metrics in the respective run directory.

This framework enables efficient and organized experimentation with various hyperparameter configurations, ensuring that each run's results are properly recorded and accessible for analysis.


In [None]:
class AutoencoderHyperparameterTester:
    def __init__(self, base_output_dir):
        """
        Initialize the Hyperparameter Tester with a base output directory.

        Args:
            base_output_dir: Directory where results and logs for all runs will be saved.
        """
        self.base_output_dir = base_output_dir
        self.run_counter = 0

    def create_run_directory(self):
        """
        Create a directory for the current run, ensuring it is distinct for autoencoder runs.

        Returns:
            run_dir: The directory path created for this run.
        """
        self.run_counter += 1
        run_dir = os.path.join(self.base_output_dir, f"autoencoder_run_{self.run_counter}")
        os.makedirs(run_dir, exist_ok=True)
        return run_dir

    def save_metrics(self, run_dir, metrics):
        """
        Save the metrics of the current run to a JSON file.

        Args:
            run_dir: The directory of the current run.
            metrics: A dictionary containing the metrics to be saved.
        """
        metrics_path = os.path.join(run_dir, 'autoencoder_metrics.json')
        with open(metrics_path, 'w') as f:
            json.dump(metrics, f)

    def save_model_state_dict(self, run_dir, model_state_dict, encoder_state_dict):
        """
        Save the model's state dictionary and the encoder's state dictionary to a file.

        Args:
            run_dir: The directory of the current run.
            model_state_dict: The state dictionary of the entire model.
            encoder_state_dict: The state dictionary of the encoder.
        """
        # Save the entire model's state dictionary
        model_path = os.path.join(run_dir, 'autoencoder_model_state_dict.pth')
        torch.save(model_state_dict, model_path)

        # Save only the encoder's state dictionary
        encoder_path = os.path.join(run_dir, 'encoder_state_dict.pth')
        torch.save(encoder_state_dict, encoder_path)

    def run_hyperparameter_search(self, hyperparams_list, train_dataset, points=2048):
        """
        Run the hyperparameter search by training an autoencoder for each configuration.

        Args:
            hyperparams_list: A list of dictionaries, each containing a different set of hyperparameters.
            train_dataset: The dataset for training.
            points: Number of points in each point cloud (default: 2048).
        """
        for hyperparams in hyperparams_list:
            # Create a directory for the current run
            run_dir = self.create_run_directory()

            # Extract hyperparameters
            batch_sz = hyperparams.get('batch_sz', 100)
            epochs = hyperparams.get('epochs', 50)
            learning_rate = hyperparams.get('learning_rate', 0.001)
            step_sz = hyperparams.get('step_sz', 20)
            decay_factor = hyperparams.get('decay_factor', 0.7)
            compute_device = hyperparams.get('compute_device', "cuda")
            use_t_net = hyperparams.get('use_t_net', False)
            seed = hyperparams.get('seed', None)

            # Initialize and run the autoencoder training
            trainer = init_and_run_autoencoder_training(
                train_dataset=train_dataset,
                batch_sz=batch_sz,
                epochs=epochs,
                points=points,
                learning_rate=learning_rate,
                step_sz=step_sz,
                decay_factor=decay_factor,
                compute_device=compute_device,
                use_t_net=use_t_net,
                run_dir=run_dir,
                seed=seed
            )

            # Save the model state dict and any relevant metrics
            self.save_model_state_dict(
                run_dir, 
                trainer.model_instance.state_dict(), 
                trainer.model_instance.encoder.state_dict()
            )
            metrics = {
                'best_train_loss': trainer.best_train_loss,
                'epoch_times': trainer.epoch_times
            }
            self.save_metrics(run_dir, metrics)


# Running the Training and Testing Process

### Step 1: Initialize the Hyperparameter Tester
Begin by setting up the `HyperparameterTester` with a specified directory to store the output of each run. This is where all the models, metrics, and logs will be saved.


In [None]:
# Initialize the hyperparameter tester for autoencoder
autoencoder_tester = AutoencoderHyperparameterTester(base_output_dir="./autoencoder_hyperparam_runs")


### Step 2: Define the Hyperparameter Configurations
Prepare a set of hyperparameter configurations that you want to test.

In [None]:
autoencoder_hyperparams_list = [

    # Config 4: High Resource Utilization
    {
        'learning_rate': 0.0005,  # Slightly higher learning rate
        'batch_sz': 150,  # Large batch size for utilizing more GPU resources
        'epochs': 600,  # Shorter training duration
        'step_sz': 50,  # More frequent decay due to higher learning rate
        'decay_factor': 0.7,  # Normal decay factor
        'seed': 42,  # Different seed for exploration
        'delete_cache': True  # Delete cache between runs
    },
]


### Step 3: Create the PointCloudDataset Instance

In this step, you initialize the `PointCloudDataset`, which is essential for feeding your training data into the autoencoder. The `PointCloudDataset` is designed to handle point cloud data effectively, applying necessary preprocessing steps and managing data caching.

#### Key Components:

- **Data Interface**: The dataset is initialized using a `data_interface` that provides access to your point cloud data.

- **Number of Points**: The `n_points` parameter specifies the number of points to sample from each point cloud. Here, it is set to 2048 points.

- **Cache Directory**: The `cache_dir` parameter defines where to store the preprocessed data. This cache speeds up training by avoiding repeated preprocessing of the same data.

- **Preprocessing Functions**: The `preprocess_funcs` parameter lists the preprocessing operations to apply to the point clouds. These include:
  - **Normalization**: Ensures that the point clouds are scaled uniformly.
  - **Jittering**: Adds small perturbations to the points to make the model more robust to noise.
  - **Random Rotation**: Applies random rotations to the point clouds to make the model invariant to orientation.

- **Sampling Method**: The `sampling_method` parameter specifies how points are sampled from the dataset. In this case, `'random_duplication'` is used, which might duplicate some points in the process, useful for handling uneven point distribution.

This step is crucial as it prepares the dataset for the training process, ensuring that the data fed into the autoencoder is consistent, well-preprocessed, and efficiently managed.


In [None]:
# Step 2: Create the PointCloudDataset instance
train_dataset = PointCloudDataset(
    data_interface=train_data_interface, 
    n_points=2048,  # Number of points in each point cloud
    cache_dir='/home/ph517705/jupyterlab/Autoencoder/Cache',  # Specify the cache directory
    preprocess_funcs= [normalize_point_cloud, jitter_point_cloud, random_rotation],
    sampling_method='random_duplication'
)


### Step 4: Execute the Hyperparameter Tests
Run the training process using the defined hyperparameters. The `HyperparameterTester` will handle the execution of each configuration, save the results, and help compare the performance across different setups.

In [None]:
# Run the hyperparameter tests for the autoencoder
autoencoder_tester.run_hyperparameter_search(autoencoder_hyperparams_list, train_dataset)
