neccessary imports, selecting the compute device, setting seed, data loading and visualization

In [25]:
# setting the seeds 
import numpy as np 
import matplotlib.pyplot as plt 
import os 
import random 
import torch 
import pandas as pd  

##  user inputs ## 
# names and directories 
dataset_dir_name = "datasets" # directory to access the dataset and save the dataset plots
dataset_filename = "amprs_10hz_36k.csv" # filename of the dataset 
data_plot_dir_name = "amprs_10hz_36k_plots" # directory name to save plots within dataset_dir_name
network_data_dir_name = "network_data" # directory to save the scalers and networks weights and biases
# determining the indices to reference the dataset  
input_1_index = 0 
input_2_index = 1 
input_3_index = 2 
midsection_x_index = 3 
midsection_y_index = 4 
midsection_z_index = 5 
ee_x_index = 6 
ee_y_index = 7 
ee_z_index = 8 
input_start_index = 0  
input_stop_index = 3 
state_start_index = 3 
state_stop_index = 9  
# train, validation and testing split 
train_percent = 0.7 
valid_percent = 0.15 
test_percent = 0.15  
# state and input lags 
lag_input = 0 
lag_state = 1   
# nn configuration 
num_hidden_layers = 0
hidden_units = 30  
learning_rate_autoregressive = 0.0001 


# selecting the compute device 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
print(f"using compute device: {device}") 
if device.type == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")  

# setting the seeds 
def set_all_seeds(seed: int = 42):
    """
    sets the seeds for python, numpy, and pytoch (CPU & GPU) 
    """
    # python random module
    random.seed(seed)
    # numpy
    np.random.seed(seed)
    # pytorch
    torch.manual_seed(seed)
    # pytorch (GPU)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    print(f"Seeds set to {seed}") 

# setting up directory paths and loading the dataset
script_directory = os.path.dirname(os.path.abspath(__file__)) if "__file__" in globals() else os.getcwd() 
dataset_path = os.path.join(script_directory, dataset_dir_name) 
def load_data(file_path: str): 
    """
    Reads a csv and returns a numpy array of all the data 
    SETS THE INDEX TO THE TIME COLUMN
    """
    df = pd.read_csv(file_path) 
    df.set_index('time',inplace=True)  
    data = df.to_numpy() 
    return data   
dataset = load_data(os.path.join(dataset_path, dataset_filename)) 
print("shape of the dataset: ", dataset.shape)  

# data plot directory 
data_plot_path = os.path.join(dataset_path, data_plot_dir_name) 
os.makedirs(data_plot_path, exist_ok=True) 

# plotting  
# 3d trajectory plot 
plot_name = "x_y_z plot.png"  
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dataset[:, ee_x_index], dataset[:, ee_y_index], dataset[:, ee_z_index], s=5)
ax.scatter(dataset[:, midsection_x_index], dataset[:, midsection_y_index], dataset[:, midsection_z_index], s=5)
ax.set_zlim(-0.4, 0)
ax.set_xlabel('EE_X[m]')
ax.set_ylabel('EE_Y[m]')
ax.set_zlabel('EE_Z[m]')
ax.set_title('3D EE Trajectory')
fig.tight_layout()
fig.savefig(os.path.join(data_plot_path, plot_name), dpi=300, bbox_inches="tight")
plt.close() 
# 3d ee trajectory plot 
plot_name = "ee_x_y_z plot.png"  
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(dataset[:, ee_x_index], dataset[:, ee_y_index], dataset[:, ee_z_index], s=5)
ax.set_zlim(-0.4, 0)
ax.set_xlabel('EE_X[m]')
ax.set_ylabel('EE_Y[m]')
ax.set_zlabel('EE_Z[m]')
ax.set_title('3D EE Trajectory')
fig.tight_layout()
fig.savefig(os.path.join(data_plot_path, plot_name), dpi=300, bbox_inches="tight")
plt.close() 
# 2d ee trajectory plot 
plot_name = "ee_x_y_plot.png"  
plt.figure()
plt.scatter(dataset[:,ee_x_index],dataset[:,ee_y_index], s=5)
plt.xlabel('EE_x[m]') 
plt.ylabel('EE_y[m]') 
plt.title('2D EE Trajectory')  
plt.tight_layout()  
plt.savefig(os.path.join(data_plot_path, plot_name), dpi=300, bbox_inches="tight") 
plt.close()  
# actuator inputs plots 
for i in range(input_stop_index) : 
    plot_name = f"u{i+1}_plot.png"   
    plt.figure()
    plt.plot(dataset[:,i])  
    plt.xlabel('Sample') 
    plt.ylabel(f"Actuator {i+1} Value") 
    plt.title(f"Actuator {i+1} Inputs")  
    plt.tight_layout()  
    plt.savefig(os.path.join(data_plot_path, plot_name), dpi=300, bbox_inches="tight") 
    plt.close()  
# state plots 
for i in range(state_start_index,state_stop_index) : 
    plot_name = f"x_{i+1}_plot.png"   
    plt.figure()
    plt.plot(dataset[:,i])  
    plt.xlabel('Sample') 
    plt.ylabel(f"State {i+1} Value") 
    plt.title(f"State {i+1} Values")  
    plt.tight_layout()  
    plt.savefig(os.path.join(data_plot_path, plot_name), dpi=300, bbox_inches="tight") 
    plt.close()

using compute device: cpu
shape of the dataset:  (36380, 9)


shifting the data and train, validation and testing split

In [26]:
# shifting the dataset 
U = dataset[:,input_start_index:input_stop_index] 
X = dataset[:,state_start_index:state_stop_index] 
print("inputs shape: ", U.shape) 
print("states shape: ", X.shape)  

# train, validation and testing split 
def train_valid_test_split(dataset: np.array, train_percent: int, valid_percent: int) : 
    """
    splits a dataset into training, validation and testing sets
    """
    len_data = len(dataset) 
    train_size = int(train_percent*len_data) 
    valid_size = int(valid_percent*len_data) 
    train_data = dataset[0:train_size] 
    valid_data = dataset[train_size:train_size+valid_size] 
    test_data = dataset[train_size+valid_size:] 
    return train_data, valid_data, test_data  
U_train, U_valid, U_test = train_valid_test_split(dataset=U, train_percent=train_percent, valid_percent=valid_percent) 
X_train, X_valid, X_test = train_valid_test_split(dataset=X, train_percent=train_percent, valid_percent=valid_percent) 
print("U and X train split shape: ", U_train.shape, X_train.shape) 
print("U and X valid split shape: ", U_valid.shape, X_valid.shape) 
print("U and X test split shape: ", U_test.shape, X_test.shape) 


inputs shape:  (36380, 3)
states shape:  (36380, 6)
U and X train split shape:  (25466, 3) (25466, 6)
U and X valid split shape:  (5457, 3) (5457, 6)
U and X test split shape:  (5457, 3) (5457, 6)


scaling the datasets and saving the scalers

In [27]:
from sklearn.preprocessing import MinMaxScaler 
input_scaler = MinMaxScaler(feature_range=(0,1)) 
state_scaler = MinMaxScaler(feature_range=(0,1)) 
input_scaler.fit(U_train)   
state_scaler.fit(X_train) 
U_train_scaled = input_scaler.transform(U_train) 
U_valid_scaled = input_scaler.transform(U_valid) 
U_test_scaled = input_scaler.transform(U_test)  
X_train_scaled = state_scaler.transform(X_train) 
X_valid_scaled = state_scaler.transform(X_valid) 
X_test_scaled = state_scaler.transform(X_test)  
# saving the scalers  
network_data_dir_path = os.path.join(script_directory, network_data_dir_name) 
os.makedirs(network_data_dir_path, exist_ok=True)
import pickle
input_scaler_filename = "input_scaler_lines.pkl" 
state_scaler_filename = "state_scaler_lines.pkl" 
with open(os.path.join(network_data_dir_path, input_scaler_filename), "wb") as file : 
    pickle.dump(input_scaler, file=file)
with open(os.path.join(network_data_dir_path, state_scaler_filename), "wb") as file : 
    pickle.dump(state_scaler, file=file) 

creating features and labels 

In [28]:
def prepare_dataset(U: np.array, X: np.array, lag_input: int, lag_state: int) :  
    """
    creates features and labels based on the state and input lag for a forward dynamical model 
    assumes the following x_k+1 = f(x_k,x_k-1:nx, u_k, u_k-1:nu)
    """
    
    features = [] 
    labels = [] 

    max_lag = max(lag_input, lag_state)

    for i in range(max_lag, len(U)-1) : 
        current_state = X[i,:] 
        if lag_state == 0 : 
            past_states = X[i:i] 
        else : 
            past_states = X[i-lag_state:i,:] 
            past_states = past_states.flatten('C') 
        current_input = U[i,:] 
        if lag_input == 0 : 
            past_inputs = U[i:i] 
        else : 
            past_inputs = U[i-lag_input:i,:] 
            past_inputs = past_inputs.flatten('C')
        
        if past_states.size == 0 and past_inputs.size == 0 : 
            joined_features = np.concatenate((current_state, current_input), axis=0) 
        elif past_states.size != 0 and past_inputs.size == 0 : 
            joined_features = np.concatenate((current_state, past_states, current_input), axis=0)
        elif past_states.size == 0 and past_inputs.size != 0 : 
            joined_features = np.concatenate((current_state, current_input, past_inputs), axis=0) 
        else : 
            joined_features = np.concatenate((current_state, past_states, current_input, past_inputs), axis=0) 

        features.append(joined_features) 
        labels.append(X[i+1]) 
    
    features = np.array(features) 
    labels = np.array(labels)  

    return features, labels 

train_features, train_labels = prepare_dataset(U=U_train_scaled, X=X_train_scaled, lag_input=lag_input, lag_state=lag_state) 
print("shape of the training features and labels: ", train_features.shape, train_labels.shape) 
valid_features, valid_labels = prepare_dataset(U=U_valid_scaled, X=X_valid_scaled, lag_input=lag_input, lag_state=lag_state) 
print("shape of the validation features and labels: ", valid_features.shape, valid_labels.shape) 
test_features, test_labels = prepare_dataset(U=U_test_scaled, X=X_test_scaled, lag_input=lag_input, lag_state=lag_state) 
print("shape of the testing features and labels: ", test_features.shape, test_labels.shape)

shape of the training features and labels:  (25464, 15) (25464, 6)
shape of the validation features and labels:  (5455, 15) (5455, 6)
shape of the testing features and labels:  (5455, 15) (5455, 6)


defining and creating the model

In [29]:
class MLP_model(torch.nn.Module): 
    def __init__(self, input_flat_size:int, hidden_units:int, output_size:int, num_hidden_layers:int) :
        super().__init__()
        self.input_flat_size = input_flat_size 
        self.hidden_units = hidden_units 
        self.output_size = output_size 
        self.num_hidden_layers = num_hidden_layers 

        hidden_layers = [] 

        in_dimension = self.input_flat_size 

        self.input_layer = torch.nn.Linear(in_features=in_dimension, out_features=self.hidden_units) 
        
        for i in range(self.num_hidden_layers) : 
            hidden_layers.append(torch.nn.Linear(in_features=self.hidden_units, out_features=self.hidden_units)) 
            hidden_layers.append(torch.nn.ReLU()) 

        self.backbone = torch.nn.Sequential(*hidden_layers) 
        
        self.output_layer = torch.nn.Linear(in_features=self.hidden_units, out_features=self.output_size) 
 
        self.relu = torch.nn.ReLU()    

    def forward(self,x): 
        out = self.input_layer(x) 
        out = self.relu(out)
        out = self.backbone(out)  
        out = self.output_layer(out) 
        return out 
    


input_flat_size = train_features.shape[1]
output_size = train_labels.shape[1] 

forward_model = MLP_model(input_flat_size=input_flat_size, hidden_units=hidden_units, output_size=output_size, num_hidden_layers=num_hidden_layers) 

from torchinfo import summary 
print(summary(model=forward_model, input_size=(1,input_flat_size)))

optimizer_autoregressive = torch.optim.Adam(forward_model.parameters(), lr=learning_rate_autoregressive)

Layer (type:depth-idx)                   Output Shape              Param #
MLP_model                                [1, 6]                    --
├─Linear: 1-1                            [1, 30]                   480
├─ReLU: 1-2                              [1, 30]                   --
├─Sequential: 1-3                        [1, 30]                   --
├─Linear: 1-4                            [1, 6]                    186
Total params: 666
Trainable params: 666
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Defining the autoregressive training function

In [30]:
def train_autoregressive(model: torch.nn.Module, U_scaled_train: np.array, X_scaled_train: np.array, 
                         U_scaled_valid: np.array, X_scaled_valid: np.array, 
                         lag_input: int, lag_state: int, 
                         epochs: int, optimizer: torch.optim.Optimizer, loss_fn: torch.nn.Module, 
                         model_name: str, chunk_length: int, early_stopping_call = None):   
    
    results = {"train_loss": [], "valid_loss": []}

    U_train = torch.from_numpy(U_scaled_train).type(torch.float32)
    X_train = torch.from_numpy(X_scaled_train).type(torch.float32)
    U_valid = torch.from_numpy(U_scaled_valid).type(torch.float32)
    X_valid = torch.from_numpy(X_scaled_valid).type(torch.float32)  

    max_lag = max(lag_input, lag_state)

    for epoch in range(epochs): 

        X_train_buffer = X_train.clone()
        X_valid_buffer = X_valid.clone()

        model.train() 
        optimizer.zero_grad() 

        preds = []  
        labels = [] 

        total_loss = 0 
        step = 0  
        counter = 0

        # initial buffer filled with ground truth 
        current_state = X_train[max_lag,:] 
        if lag_state == 0 : 
            past_state = X_train[max_lag:max_lag] 
        else : 
            past_state = X_train[max_lag-lag_state:max_lag,:] 
            past_state = torch.flatten(input=past_state) 
        current_input = U_train[max_lag,:] 
        if lag_input == 0 : 
            past_input = U_train[max_lag:max_lag,:]
        else : 
            past_input = U_train[max_lag-lag_input:max_lag,:] 
            past_input = torch.flatten(input=past_input) 

        if past_state.size(dim=0) == 0 and past_input.size(dim=0) == 0 : 
            joined_features = torch.concatenate((current_state, current_input), dim=0) 
        elif past_state.size(dim=0) != 0 and past_input.size(dim=0) == 0 : 
            joined_features = torch.concatenate((current_state, past_state, current_input), dim=0)
        elif past_state.size(dim=0) == 0 and past_input.size(dim=0) != 0 : 
            joined_features = torch.concatenate((current_state, current_input, past_input), dim=0) 
        else : 
            joined_features = torch.concatenate((current_state, past_state, current_input, past_input), dim=0)  

        for i in range(max_lag+1, len(U_train)) : 

            pred = model(joined_features.unsqueeze(0)) 
            pred = pred.squeeze(0) 

            step += 1 

            preds.append(pred)  

            labels.append(X_train[i])

            X_train_buffer[i,:] = pred  

            if i < len(U_train) - 1 :

                # buffer update 
                current_state = pred
                if lag_state == 0 : 
                    past_state = X_train_buffer[i:i] 
                else : 
                    past_state = X_train_buffer[i-lag_state:i,:] 
                    past_state = torch.flatten(input=past_state)
                current_input = U_train[i,:] 
                if lag_input == 0 : 
                    past_input = U_train[i:i,:]
                else : 
                    past_input = U_train[i-lag_input:i,:] 
                    past_input = torch.flatten(input=past_input) 

                if past_state.size(dim=0) == 0 and past_input.size(dim=0) == 0 : 
                    joined_features = torch.concatenate((current_state, current_input), dim=0) 
                elif past_state.size(dim=0) != 0 and past_input.size(dim=0) == 0 : 
                    joined_features = torch.concatenate((current_state, past_state, current_input), dim=0)
                elif past_state.size(dim=0) == 0 and past_input.size(dim=0) != 0 : 
                    joined_features = torch.concatenate((current_state, current_input, past_input), dim=0) 
                else : 
                    joined_features = torch.concatenate((current_state, past_state, current_input, past_input), dim=0) 

            else : 
                pass  

            if step == chunk_length or i == len(U_train) - 1:  

                preds_tensor = torch.stack(preds, dim=0)  

                labels_tensor = torch.stack(labels, dim=0)

                loss = loss_fn(preds_tensor, labels_tensor)  

                loss.backward() 
                total_loss += loss.item()
                optimizer.step()  
                optimizer.zero_grad()


                current_state = current_state.detach() 
                X_train_buffer = X_train_buffer.detach() 
                joined_features = joined_features.detach()

                total_loss += loss.item() 
                
                step = 0 
                preds = [] 
                labels = []   
                counter += 1  
            else : 
                pass

        train_loss = total_loss/counter

        # validation 
        model.eval() 

        preds = []

        # initial buffer filled with ground truth 
        current_state = X_valid[max_lag,:] 
        if lag_state == 0 : 
            past_state = X_valid[max_lag:max_lag] 
        else : 
            past_state = X_valid[max_lag-lag_state:max_lag,:] 
            past_state = torch.flatten(input=past_state) 
        current_input = U_valid[max_lag,:] 
        if lag_input == 0 : 
            past_input = U_valid[max_lag:max_lag,:]
        else : 
            past_input = U_valid[max_lag-lag_input:max_lag,:] 
            past_input = torch.flatten(input=past_input) 

        if past_state.size(dim=0) == 0 and past_input.size(dim=0) == 0 : 
            joined_features = torch.concatenate((current_state, current_input), dim=0) 
        elif past_state.size(dim=0) != 0 and past_input.size(dim=0) == 0 : 
            joined_features = torch.concatenate((current_state, past_state, current_input), dim=0)
        elif past_state.size(dim=0) == 0 and past_input.size(dim=0) != 0 : 
            joined_features = torch.concatenate((current_state, current_input, past_input), dim=0) 
        else : 
            joined_features = torch.concatenate((current_state, past_state, current_input, past_input), dim=0) 
        
        with torch.inference_mode() :  

            for i in range(max_lag+1, len(U_valid)) : 

                pred = model(joined_features.unsqueeze(0)) 
                pred = pred.squeeze(0) 

                preds.append(pred) 

                X_valid_buffer[i,:] = pred  

                if i < len(U_valid) - 1 : 

                    # buffer update 
                    current_state = pred
                    if lag_state == 0 : 
                        past_state = X_valid[i:i] 
                    else : 
                        past_state = X_valid_buffer[i-lag_state:i,:] 
                        past_state = torch.flatten(input=past_state)
                    current_input = U_valid[i,:] 
                    if lag_input == 0 : 
                        past_input = U_valid[i:i,:]
                    else : 
                        past_input = U_valid[i-lag_input:i,:] 
                        past_input = torch.flatten(input=past_input)  

                    if past_state.size(dim=0) == 0 and past_input.size(dim=0) == 0 : 
                        joined_features = torch.concatenate((current_state, current_input), dim=0) 
                    elif past_state.size(dim=0) != 0 and past_input.size(dim=0) == 0 : 
                        joined_features = torch.concatenate((current_state, past_state, current_input), dim=0)
                    elif past_state.size(dim=0) == 0 and past_input.size(dim=0) != 0 : 
                        joined_features = torch.concatenate((current_state, current_input, past_input), dim=0) 
                    else : 
                        joined_features = torch.concatenate((current_state, past_state, current_input, past_input), dim=0)
                     

                else : 
                    pass

        preds_tensor = torch.stack(preds, dim=0) 

        targets = X_valid[max_lag+1:] 

        loss = loss_fn(preds_tensor, targets) 

        valid_loss = loss.item() 

        print(
        f"Model: {model_name} |"
        f"Epoch: {epoch + 1} | "
        f"Train Loss: {train_loss:.6f} | " 
        f"Validation_Loss: {valid_loss:.6f} | " 
        ) 

        results["train_loss"].append(train_loss) 
        results["valid_loss"].append(valid_loss)

        if early_stopping_call is not None: 
            early_stopping_call(valid_loss) 
            if early_stopping_call.early_stop : 
                print("Early Stopping Triggered") 
                break 

    return results

defining early stopping function

In [31]:
class EarlyStopping:
    def __init__(self, patience=6, min_delta=0.0):
        """
        patience: number of epochs to wait for improvement
        min_delta: minimum improvement in validation loss to count as progress
        """
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = np.inf
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

training 