In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import os
from math import *

print("The current directory is: ")
print(os.getcwd())
if not os.getcwd().endswith("Abaqus-Hardening-Seq-2-Seq-Project"):
    # Move up two directories
    path_parent = os.path.dirname(os.getcwd())
    os.chdir(path_parent)
    path_parent = os.path.dirname(os.getcwd())
    os.chdir(path_parent)
print("The current directory is: ")
print(os.getcwd())

The current directory is: 
/scratch/project_2008630/Abaqus-Hardening-Seq-2-Seq-Project
The current directory is: 
/scratch/project_2008630/Abaqus-Hardening-Seq-2-Seq-Project


In [5]:
from configs.chosen_project import *
from src.stage1_global_configs import *

chosen_project_path = "configs/global_config_CP1000_RD_20C.json"

global_configs = main_global_configs(chosen_project_path)

all_paths = global_configs['all_paths']
objectives = global_configs['objectives']


= Stage 1: Loading configs and all paths =

Welcome to Abaqus Seq2Seq flow curve calibration project

The configurations you have chosen: 

+--------------------------+-------------------------------------------------------------+
|      Global Configs      |                         User choice                         |
+--------------------------+-------------------------------------------------------------+
|         PROJECT          |                        CP1000_RD_20C                        |
|        OBJECTIVES        |      CHD2, CHD4, NDBR2p5, NDBR6, NDBR15, NDBR40, SH115      |
|       PROJECT_PATH       | /scratch/project_2008630/Abaqus-Hardening-Seq-2-Seq-Project |
|    TRAINING_DATA_PATH    |                 training_data/CP1000_RD_20C                 |
|         LOG_PATH         |                    log/CP1000_RD_20C.txt                    |
|       MODELS_PATH        |                     models/CP1000_RD_20C                    |
|  RESULTS_INIT_DATA_PATH  |            

# Loading the training data

In [7]:
import torch

training_data_path = all_paths['training_data_path']
models_path = all_paths['models_path']

initial_train_source_sequence_diff = torch.load(f"training_data/CP1000_RD_20C_divided_index_0/initial_train_source_diff_all.pt")
initial_train_target_sequence_diff = torch.load(f"training_data/CP1000_RD_20C_divided_index_0/initial_train_target_diff_last.pt")

initial_test_source_sequence_diff = torch.load(f"training_data/CP1000_RD_20C_divided_index_0//initial_test_source_diff_all.pt")
initial_test_target_sequence_diff = torch.load(f"training_data/CP1000_RD_20C_divided_index_0//initial_test_target_diff_last.pt")

# Convert them to float32

initial_train_source_sequence_diff = initial_train_source_sequence_diff.float()
initial_train_target_sequence_diff = initial_train_target_sequence_diff.float()
initial_test_source_sequence_diff = initial_test_source_sequence_diff.float()
initial_test_target_sequence_diff = initial_test_target_sequence_diff.float()

print(f"Shape of the training source sequence: {initial_train_source_sequence_diff.shape}")
print(f"Shape of the training target sequence: {initial_train_target_sequence_diff.shape}")
print(f"Shape of the testing source sequence: {initial_test_source_sequence_diff.shape}")
print(f"Shape of the testing target sequence: {initial_test_target_sequence_diff.shape}")

# Check if any of them has NaN or infinite values

print(f"Number of NaN values in train_source_sequence: {np.isnan(initial_train_source_sequence_diff).sum()}")
print(f"Number of NaN values in train_target_sequence: {np.isnan(initial_train_target_sequence_diff).sum()}")
print(f"Number of NaN values in test_source_sequence: {np.isnan(initial_test_source_sequence_diff).sum()}")
print(f"Number of NaN values in test_target_sequence: {np.isnan(initial_test_target_sequence_diff).sum()}")

print(f"Number of infinite values in train_source_sequence: {np.isinf(initial_train_source_sequence_diff).sum()}")
print(f"Number of infinite values in train_target_sequence: {np.isinf(initial_train_target_sequence_diff).sum()}")
print(f"Number of infinite values in test_source_sequence: {np.isinf(initial_test_source_sequence_diff).sum()}")
print(f"Number of infinite values in test_target_sequence: {np.isinf(initial_test_target_sequence_diff).sum()}")

# Ensure that all target_sequence are positive
print(f"Number of negative values in train_target_sequence: {(initial_train_target_sequence_diff < 0).sum()}")
print(f"Number of negative values in test_target_sequence: {(initial_test_target_sequence_diff < 0).sum()}")

Shape of the training source sequence: torch.Size([192, 99, 7])
Shape of the training target sequence: torch.Size([192, 99, 1])
Shape of the testing source sequence: torch.Size([64, 99, 7])
Shape of the testing target sequence: torch.Size([64, 99, 1])
Number of NaN values in train_source_sequence: 0
Number of NaN values in train_target_sequence: 0
Number of NaN values in test_source_sequence: 0
Number of NaN values in test_target_sequence: 0
Number of infinite values in train_source_sequence: 0
Number of infinite values in train_target_sequence: 0
Number of infinite values in test_source_sequence: 0
Number of infinite values in test_target_sequence: 0
Number of negative values in train_target_sequence: 0
Number of negative values in test_target_sequence: 0


In [8]:
# Ensure that the scale of the source sequence is correct
print(initial_train_source_sequence_diff[0][0])

tensor([0.0443, 0.0462, 0.0313, 0.0378, 0.0508, 0.0566, 0.0325])


In [9]:
# Ensure that the scale of the target sequence is correct
print(initial_train_target_sequence_diff[0][0])

tensor([1.4783])


In [10]:
# Define the device

training_env = "CSC" # Choose between "local" and "CSC"
if training_env == "local":
    device = "cpu"
else:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [11]:
# Function to process the data
def process_sequence(tensor):
    # Calculate the minimum across the batch size dimension
    mean_values = tensor.mean(dim=0).squeeze(1)
    print(mean_values.shape)
    print(mean_values[0])
    print(mean_values[-1])
    # Calculate the ratio of the first timestep to the last timestep
    ratio = mean_values[0] / mean_values[-1]
    
    return ratio

# Process train and test target sequences
train_ratio = process_sequence(initial_train_target_sequence_diff)
test_ratio = process_sequence(initial_test_target_sequence_diff)

# Output the results (you might want to format or log these depending on your use case)
print("Training Data:")
#print("Minimum Values:", train_min)
print("Ratio First to Last Timestep:", train_ratio)

print("\nTest Data:")
#print("Minimum Values:", test_min)
print("Ratio First to Last Timestep:", test_ratio)

torch.Size([99])
tensor(1.2323)
tensor(0.7763)
torch.Size([99])
tensor(1.2348)
tensor(0.7847)
Training Data:
Ratio First to Last Timestep: tensor(1.5873)

Test Data:
Ratio First to Last Timestep: tensor(1.5735)


We shall proceed to train the model with RMSE of linearly decreasing weights, where max_ratio_differ is 1

It is much more costly to regress wrong the first flow curve incremental changes than the last ones, since if the first ones are wrong, these errors would accumulate and totally shift the flow curve. You can set max_ratio_differ to a higher value than 1 to prioritize the first points accuracy

# Start training the model

In [12]:
from optimization.LSTM_helper import *
from optimization.LSTM import *

def train_LSTM(previous_model, current_model, previous_best_test_loss, 
               dropout, num_epochs, start_lr, end_lr, start_tf, end_tf, 
               weight_decay, max_ratio_differ):
    
    train_dataset = TensorDataset(initial_train_source_sequence_diff, initial_train_target_sequence_diff)
    test_dataset = TensorDataset(initial_test_source_sequence_diff, initial_test_target_sequence_diff)
    
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
    
    # Parameters
    _, source_len, feature_size = initial_train_source_sequence_diff.shape
    _, target_len, label_size = initial_train_target_sequence_diff.shape
    
    hidden_size = 256
    num_layers = 3
    
    bidirectional = True  # Set this flag to True or False as needed
    use_attention = True  # Set this flag to True or False to enable/disable attention

    # Initialize model, loss function, and optimizer
    model = LSTMModel(feature_size, label_size,
                      source_len, target_len,
                      hidden_size, num_layers,
                      dropout=dropout,
                      bidirectional=bidirectional, 
                      use_attention=use_attention).to(device)
    
    criterion = RMSELoss(linear_weight=True, max_ratio_differ = max_ratio_differ)  # Use the custom RMSE loss
    optimizer = optim.Adam(model.parameters(), lr=start_lr, weight_decay=weight_decay)  # Adding L2 regularization

    if previous_model is not None:
        # Loading the best model from the previous training
        model.load_state_dict(torch.load(f"{models_path}/LSTM/initial/best_model_{previous_model}.pth"))
        best_test_loss = previous_best_test_loss
    else:
        best_test_loss = float('inf')
        
    # Count the number of parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f'The model has {total_params} parameters.')
    
    # Track the best model
    best_model_path = f"{models_path}/LSTM/initial/best_model_{current_model}.pth"
    
    # Lists to track train and test losses
    train_losses = []
    test_losses = []
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
    
        # Update learning rate
        current_lr = linear_lr_scheduler(optimizer, epoch, start_lr, end_lr, num_epochs)
        
        # Get the current teacher forcing probability from the scheduler
        teacher_forcing_prob = linear_teacher_forcing_scheduler(epoch, start_tf, end_tf, num_epochs)
    
        # teacher_forcing_prob = log_teacher_forcing_scheduler(epoch, start_tf, end_tf, num_epochs)
        for batch_idx, (source_batch, target_batch) in enumerate(train_loader):
            source_batch, target_batch = source_batch.to(device), target_batch.to(device)
    
            optimizer.zero_grad()
    
            # Forward pass
            outputs = model(source_batch, target_batch, teacher_forcing_prob)
            loss = criterion(outputs, target_batch)
    
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
    
            train_loss += loss.item()
    
        train_loss /= len(train_loader)
        train_losses.append(train_loss)
    
        # Evaluate on test set
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for source_batch, target_batch in test_loader:
                source_batch, target_batch = source_batch.to(device), target_batch.to(device)
    
                # Forward pass
                outputs = model(source_batch)
                loss = criterion(outputs, target_batch)
    
                test_loss += loss.item()
    
        test_loss /= len(test_loader)
        test_losses.append(test_loss)
    
        # Save the best model
        if test_loss < best_test_loss:
            print(f"New best test loss found: {test_loss}")
            best_test_loss = test_loss
            best_model = model
            torch.save(model.state_dict(), best_model_path)
    
        # Print progress
        if (epoch+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}, LR: {current_lr:.9f}, TF: {teacher_forcing_prob:.9f}')
    
    # Save the train and test loss lists as .npy files
    np.save(f'{models_path}/LSTM/initial/train_losses_{current_model}.npy', np.array(train_losses))
    np.save(f'{models_path}/LSTM/initial/test_losses_{current_model}.npy', np.array(test_losses))
    
    torch.save(model.state_dict(), f"{models_path}/LSTM/initial/last_model_{current_model}.pth")
    
    print('Training complete')
    print(f'Best model saved with test loss: {best_test_loss:.4f}')

In [13]:
train_LSTM(previous_model=None, current_model="1_train_diff", previous_best_test_loss=None, 
               dropout=0.01, num_epochs=1000, start_lr=0.0005, end_lr=0.0001, start_tf=1.0, end_tf=0.0, 
               weight_decay=1e-3, max_ratio_differ=1)

The model has 8955393 parameters.
New best test loss found: 5.5944013595581055
New best test loss found: 5.592984199523926
New best test loss found: 5.58970832824707
New best test loss found: 5.586631774902344
New best test loss found: 5.584264278411865
New best test loss found: 5.583493709564209
New best test loss found: 5.581272125244141
New best test loss found: 5.58078670501709
New best test loss found: 5.577939987182617
New best test loss found: 5.576466083526611
New best test loss found: 5.569815158843994
New best test loss found: 5.5618977546691895
New best test loss found: 5.537265300750732
New best test loss found: 5.362679481506348
New best test loss found: 5.159878730773926
New best test loss found: 4.84882116317749
New best test loss found: 4.590609550476074
New best test loss found: 4.537997722625732
New best test loss found: 4.468242645263672
New best test loss found: 4.419610023498535
New best test loss found: 4.319255828857422
New best test loss found: 4.238533496856689

In [18]:
train_LSTM(previous_model="1_train_diff", current_model="2_train_diff", previous_best_test_loss=0.4221002757549286, 
               dropout=0.01, num_epochs=1000, start_lr=0.0001, end_lr=0.00001, start_tf=1.0, end_tf=0.0, 
               weight_decay=1e-3, max_ratio_differ=1)

The model has 8955393 parameters.
New best test loss found: 0.420843243598938
New best test loss found: 0.4193941652774811
New best test loss found: 0.4193079471588135
Epoch [100/1000], Train Loss: 0.382713, Test Loss: 0.419308, LR: 0.000091090, TF: 0.901000000
New best test loss found: 0.4159759283065796
New best test loss found: 0.4133967459201813
Epoch [200/1000], Train Loss: 0.387355, Test Loss: 0.423981, LR: 0.000082090, TF: 0.801000000
New best test loss found: 0.4114418029785156
New best test loss found: 0.4112423360347748
New best test loss found: 0.41071128845214844
New best test loss found: 0.40998151898384094
New best test loss found: 0.40996912121772766
New best test loss found: 0.40948426723480225
New best test loss found: 0.4079590439796448
Epoch [300/1000], Train Loss: 0.373489, Test Loss: 0.421219, LR: 0.000073090, TF: 0.701000000
New best test loss found: 0.40697377920150757
New best test loss found: 0.40667974948883057
New best test loss found: 0.40556126832962036
New