In [1]:
# Install packages - python kernel must be version 3.12 or below !
# a lot of these packages are not needed for this notebook, but are taken from the original notebook

# ML packages

# torch
import torch # type: ignore
import torch.optim as optim # type: ignore
import torch.nn.functional as F # type: ignore
from torch import nn # type: ignore
from torch.utils.data import Dataset, DataLoader # type: ignore
from torch.utils.data import random_split  # type: ignore
from torchvision.transforms import ToTensor # type: ignore
from torch.utils.data import TensorDataset, DataLoader # type: ignore
import torchvision # type: ignore
import torchvision.transforms as transforms # type: ignore
from torchmetrics.regression import MeanAbsolutePercentageError  # type: ignore
from torchmetrics.regression import MeanSquaredError  # type: ignore
from torchmetrics.regression import MeanAbsoluteError  # type: ignore
from torchinfo import summary #type:ignore

# tuning
import ray # type: ignore
from ray import tune  # type: ignore
from ray import train  # type: ignore
from ray.tune import CLIReporter  # type: ignore
from ray.tune.schedulers import ASHAScheduler  # type: ignore
from ray.train import Checkpoint, get_checkpoint  # type: ignore
from ray.tune.schedulers import ASHAScheduler  # type: ignore
import ray.cloudpickle as pickle  # type: ignore


# linear algebra, array manip and data analysis
import numpy as np # type: ignore
import pandas as pd # type: ignore
import matplotlib.pyplot as plt  # type: ignore
from mpl_toolkits.mplot3d import Axes3D # type: ignore
from sklearn.model_selection import train_test_split # type: ignore
from sklearn.preprocessing import StandardScaler # type: ignore
from tabulate import tabulate # type: ignore

# misc
from functools import partial
import os
import tempfile
from pathlib import Path
import time


In [2]:
# import the data 
data = pd.read_csv('All_data.csv')
data.head()

Unnamed: 0,p1,p2,p3,x,y,z,roll,pitch,yaw
0,1170,732,1609,-0.444512,-0.326699,-0.119629,125.507812,66.115723,-20.544434
1,2099,1990,2400,-0.303887,-0.247598,-0.849121,114.916992,78.112793,-23.005371
2,65,549,795,-0.044609,0.121543,-0.080078,74.86084,85.759277,-71.696777
3,2493,403,56,0.535469,-1.214395,0.394531,-159.65332,32.036133,45.021973
4,967,2464,523,1.50666,0.723594,0.284668,-68.686523,19.445801,137.834473


In [3]:
# extrap data and create arrays
I = data[['x','y','z']].values
C = data[['p1','p2','p3']].values
# split the matrices into random train and test subjects

# splits the data into 80% training and 20% testing for both I and C
I_train,I_test,C_train,C_test=train_test_split(I,C,test_size=0.2,random_state=30)
# arrays (I,C) , split size (20%) , int in random state allows for same result each time func is called 

# splits the data again , out of the 80 (training) take another 20 for validation 
I_train,I_val,C_train,C_val=train_test_split(I_train,C_train,test_size=0.25,random_state=30)
# arrays (I and C train) , 25% , rand. int

# print the shape of the data to confirm the splits
print(f"I_train: {I_train.shape}, C_train: {C_train.shape}")
print(f"I_test: {I_test.shape}, C_test: {C_test.shape}")
print(f"I_val: {I_val.shape}, C_val: {C_val.shape}")

I_train: (10842, 3), C_train: (10842, 3)
I_test: (3615, 3), C_test: (3615, 3)
I_val: (3615, 3), C_val: (3615, 3)


In [4]:
# Normalise and conv into compatible data types

# Normalise the input features and target values
scaler_input = StandardScaler()
scaler_output = StandardScaler()


# Fit the scalers on the training data and transform all splits
I_train = scaler_input.fit_transform(I_train)
I_val = scaler_input.transform(I_val)
I_test = scaler_input.transform(I_test)

C_train = scaler_output.fit_transform(C_train)
C_val = scaler_output.transform(C_val)
C_test = scaler_output.transform(C_test)

# Convert to PyTorch tensors
I_train = torch.tensor(I_train, dtype=torch.float32)
I_val = torch.tensor(I_val, dtype=torch.float32)
I_test = torch.tensor(I_test, dtype=torch.float32)

C_train = torch.tensor(C_train, dtype=torch.float32)
C_val = torch.tensor(C_val, dtype=torch.float32)
C_test = torch.tensor(C_test, dtype=torch.float32)

In [5]:
# Define the MLP Regressor model

class MLPRegressor(nn.Module):
    def __init__(self, l1=120, l2=84, l3=10, activation='Tanh', dropout_prob=0.2): # set defaults here 
        super(MLPRegressor, self).__init__()

        # Dynamically choose the activation function based on config
        self.activation = getattr(nn, activation.capitalize(), nn.Tanh)() 

        # Define hidden layer params 
        self.hidden_layers = nn.Sequential(
            nn.Linear(3, l1), # input layer -> hidden
            self.activation, # acitviation function configured in search space
            nn.Dropout(p=dropout_prob), # drop out layer - probability config in search space
            nn.Linear(l1, l2), # hidden 1 -> 2
            self.activation,
            nn.Dropout(p=dropout_prob),
            nn.Linear(l2, l3), # hidden 2 -> 3
            self.activation,
            nn.Dropout(p=dropout_prob)
        )
        
        # Define output layer with 3 outputs
        self.output_layer = nn.Linear(l3, 3)  # hidden 3 -> output
        
    def forward(self, x):
        x = self.hidden_layers(x)
        return self.output_layer(x)

In [6]:
# Instantiate the models
model8 = torch.load("PR_RT_NR_T26.pickle",weights_only=False,map_location=torch.device('cpu'))

In [7]:
# Evaluation function
def eval(model, save_path=None):
    # Create the testing data loader
    test_loader = DataLoader(
        TensorDataset(I_test, C_test),
        batch_size=64,  # Adjust batch size if necessary
        shuffle=False
    )

    device = "cuda:0" if torch.cuda.is_available() else "cpu"  # Determine the device to use (GPU if available, else CPU)
    # Check for parallel computing ability
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)  # Use multiple GPUs if available
    # Assign the model to the chosen device
    model.to(device)  # Move the model to the chosen device

    # Put the model in evaluation mode
    model.eval()  # Set the model to evaluation mode

    # Initialize metrics
    metrics = {
        "MAPE": MeanAbsolutePercentageError().to(device),  # Mean Absolute Percentage Error
        "MSE": MeanSquaredError().to(device),  # Mean Squared Error
        "MAE": MeanAbsoluteError().to(device)  # Mean Absolute Error
    }

    # Initialize total metrics
    metrics_total = {name: 0.0 for name in metrics}  # Dictionary to store total metrics

    # Measure time for a single point
    single_point_time = 0.0  # Variable to store the average time per point

    # List to store predictions, targets, and inputs
    all_predictions = []  # List to store all predictions
    all_targets = []  # List to store all targets
    all_inputs = []  # List to store all processed inputs
    all_inputs_unprocessed = []  # List to store all unprocessed inputs

    # Evaluate on test data
    with torch.no_grad():  # Disable gradient calculation for evaluation
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)  # Move inputs and targets to the chosen device
            start_time = time.time()  # Start time
            outputs = model(inputs)  # Get model predictions
            end_time = time.time()  # End time
            single_point_time += (end_time - start_time) / len(inputs)  # Calculate average time per point
            for name, metric in metrics.items():
                metrics_total[name] += metric(outputs, targets).item()  # Update total metrics

            # Store predictions, targets, and inputs
            all_predictions.append(outputs.cpu().numpy())  # Store predictions
            all_targets.append(targets.cpu().numpy())  # Store targets
            all_inputs.append(inputs.cpu().numpy())  # Store processed inputs
            all_inputs_unprocessed.append(scaler_input.inverse_transform(inputs.cpu().numpy()))  # Store unprocessed inputs

    # Calculate average metrics
    metrics_avg = {name: total / len(test_loader) for name, total in metrics_total.items()}  # Calculate average metrics
    metrics_avg['Single Point Time (s)'] = single_point_time / len(test_loader)  # Add time metric

    # Save predictions, targets, and inputs to CSV if save_path is provided
    if save_path:
        predictions = np.concatenate(all_predictions, axis=0)  # Concatenate all predictions
        targets = np.concatenate(all_targets, axis=0)  # Concatenate all targets
        inputs_processed = np.concatenate(all_inputs, axis=0)  # Concatenate all processed inputs
        inputs_unprocessed = np.concatenate(all_inputs_unprocessed, axis=0)  # Concatenate all unprocessed inputs
        df = pd.DataFrame(np.hstack((predictions, targets, inputs_processed, inputs_unprocessed)),
                         columns=['pred_p1','pred_p2', 'pred_p3',
                                  'true_p1', 'true_p2', 'true_p3',
                                  'proc_x', 'proc_y', 'proc_z',
                                  'unproc_x', 'unproc_y', 'unproc_z'])  # Create DataFrame with all data
        df.to_csv(save_path, index=False)  # Save DataFrame to CSV

    return metrics_avg  # Return metrics for table display


In [8]:
# Table Printing for Multiple Models
models = {
          "Model 8": model8
          }

# Evaluate all models and save outputs
results = []  # List to store results
for name, model in models.items():  # Loop through all models
    metrics_avg = eval(model, save_path=f"{name}_predictions.csv")  # Get metrics for the model and save predictions
    results.append([name] + [f"{metrics_avg[metric]:.6f}" for metric in metrics_avg])  # Append to results list

# Header for the table
header = ["Model"] + list(metrics_avg.keys())

# Print the table
print(tabulate(results, headers=header, tablefmt="grid"))


+---------+---------+----------+----------+-------------------------+
| Model   |    MAPE |      MSE |      MAE |   Single Point Time (s) |
| Model 8 | 1.09312 | 0.163972 | 0.327286 |                 5.8e-05 |
+---------+---------+----------+----------+-------------------------+


In [9]:
# Choose a model to print information about
model_to_inspect = model8

# Print model architecture
print("Model Architecture:")
print(model_to_inspect)

# Print optimizer configuration
optimizer = optim.Adam(model_to_inspect.parameters())
print("\nOptimizer Configuration:")
print(optimizer)

# Print model summary
print("\nModel Summary:")
print(summary(model_to_inspect))


Model Architecture:
MLPRegressor(
  (activation): Tanh()
  (hidden_layers): Sequential(
    (0): Linear(in_features=3, out_features=2048, bias=True)
    (1): Tanh()
    (2): Dropout(p=0.2834286389916907, inplace=False)
    (3): Linear(in_features=2048, out_features=128, bias=True)
    (4): Tanh()
    (5): Dropout(p=0.2834286389916907, inplace=False)
    (6): Linear(in_features=128, out_features=4, bias=True)
    (7): Tanh()
    (8): Dropout(p=0.2834286389916907, inplace=False)
  )
  (output_layer): Linear(in_features=4, out_features=3, bias=True)
)

Optimizer Configuration:
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

Model Summary:
Layer (type:depth-idx)                   Param #
MLPRegressor                             --
├─Tanh: 1-1                              --
├─Sequential: 1-2                        --
│

activation': 'Hardtanh', 'alpha': 0.006372252701961378, 'batch_size': 256, 'l1': 256, 'l2': 256, 'l3': 64, 'learning_rate_init': 0.001423761300766936, 'max_iter': 20000, 'tol': 0.0008054526187556726, 'momentum': 0.8856250076974486, 'validation_fraction': 0.17332561677021732, 'dropout_prob': 0.10063715392199685, 'optimiser': 'NAdam'

In [10]:
for name, model in models.items():
    print(f"Summary of {name}:")
    print(summary(model))
    print("\n" + "\033[91m" + "="*80 + "\033[0m" + "\n")  # Red color bar


Summary of Model 8:
Layer (type:depth-idx)                   Param #
MLPRegressor                             --
├─Tanh: 1-1                              --
├─Sequential: 1-2                        --
│    └─Linear: 2-1                       8,192
│    └─Tanh: 2-2                         --
│    └─Dropout: 2-3                      --
│    └─Linear: 2-4                       262,272
│    └─Tanh: 2-5                         --
│    └─Dropout: 2-6                      --
│    └─Linear: 2-7                       516
│    └─Tanh: 2-8                         --
│    └─Dropout: 2-9                      --
├─Linear: 1-3                            15
Total params: 270,995
Trainable params: 270,995
Non-trainable params: 0


