In [18]:
# Import necessary libraries
import torch  
import torch.nn as nn  
import torch.optim as optim  
import numpy as np  
from sklearn.model_selection import train_test_split
import torch.utils.data as Data
import pandas as pd
from itertools import product  

# Set a fixed random seed to ensure reproducibility
torch.manual_seed(seed=1)                       
torch.cuda.manual_seed(seed=1)                  
np.random.seed(seed=1)                         
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

# Use GPU to accelerate computation
device=torch.device('cuda')

In [19]:
# Load dataset
df = pd.read_csv('LSTMDataset.csv',header=None)
X = df.drop(columns=[0, 1]).values
y = df[[0, 1]].values.astype("float32")

# Data processing
# 1-Feature normalization
X_mean = X.mean(axis=0).reshape(1000,1)
X_std = X.std(axis=0).reshape(1000,1)
for i in range(X.shape[1]):
    X[:, i] = (X[:, i] - X_mean[i]) / X_std[i]
X_normalized = (X - X_mean.T) / X_std.T   
# 2-Remove outliers 
indices_to_remove = np.any(np.abs(X_normalized) > 3, axis=1)   
indices_to_remove = np.where(indices_to_remove)[0]  
X = np.delete(X, indices_to_remove, axis=0)  
y = np.delete(y, indices_to_remove, axis=0)
# 3-Label normalization 
y_max = y.max(axis=0)
y[:,0] = y[:,0] / y_max[0]
y[:,1] = y[:,1] / y_max[1]

In [20]:
# Dataset splitting: 90% training set, 5% validation set, 5% test set
X_train, X_nontrain, Y_train, Y_nontrain = train_test_split(X, y, test_size=0.1, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_nontrain, Y_nontrain, test_size=0.5, random_state=42)
X_train = X_train.T
X_val = X_val.T
X_test = X_test.T
Y_train = Y_train.T
Y_val = Y_val.T
Y_test = Y_test.T

In [21]:
# To generate sequence data with 25 time steps
n_x = 400
(T_x, m) = X_train.shape   
reshaped_X_train = np.zeros((m, int(13*(T_x//n_x)-1), n_x))
(T_x, m) = X_val.shape   
reshaped_X_val = np.zeros((m, int(13*(T_x//n_x)-1), n_x))
(T_x, m) = X_test.shape   
reshaped_X_test = np.zeros((m, int(13*(T_x//n_x)-1), n_x))
for i in range(int(13*(T_x//n_x)-1)):
    for j in range(n_x):
        reshaped_X_train[:, i, j] = X_train[int(n_x/16*i+j), :]
        reshaped_X_val[:, i, j] = X_val[int(n_x/16*i+j), :]
        reshaped_X_test[:, i, j] = X_test[int(n_x/16*i+j), :]

In [22]:
# Definition of the LSTM model  
class LSTMModel(nn.Module):  
    def __init__(self, input_size, hidden_size, output_size, dropout_coeff):  
        super(LSTMModel, self).__init__()  
        self.hidden_size = hidden_size  
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)  
        self.fc = nn.Sequential(  
            nn.Linear(hidden_size, output_size),  
            nn.Sigmoid()  
        )
        self.dropout = nn.Dropout(p=dropout_coeff)
        
    def forward(self, x):  
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device) # hidden state  
        c0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device) # cell state
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])                                  # last step  
        return out  

In [23]:
def recover_predictions(test_predictions, Y_test, y_max):

    # recover label
    test_predictions_recovered = np.zeros_like(test_predictions)
    Y_test_recovered = np.zeros_like(Y_test)

    test_predictions_recovered[0] = test_predictions[0] * y_max[0]
    test_predictions_recovered[1] = test_predictions[1] * y_max[1]
    Y_test_recovered[0] = Y_test[0] * y_max[0]
    Y_test_recovered[1] = Y_test[1] * y_max[1]

    return test_predictions_recovered, Y_test_recovered

In [24]:
# percentage loss
def criterion(y_pred, y):
    Loss = torch.mean(torch.div(torch.abs(torch.sub(y_pred, y)), y) * 100) 
    return Loss

In [25]:
# Train  
def train_lstm_model(X_train, Y_train, X_val, Y_val, BATCH_SIZE, input_size, hidden_size, output_size,  
                      learning_rate, iterations, dropout_coeff):
    
    # Set hyperparameters
    model = LSTMModel(input_size, hidden_size, output_size, dropout_coeff)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)  
    
    # Use GPU to accelerate computation
    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)  
    Y_train = torch.tensor(Y_train, dtype=torch.float32).to(device)  
    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)  
    Y_val = torch.tensor(Y_val, dtype=torch.float32).to(device)
    torch_dataset = Data.TensorDataset(X_train, Y_train.T)
    loader = Data.DataLoader(
        dataset = torch_dataset,     
        batch_size = BATCH_SIZE,     
        shuffle = False,             
        num_workers = 12,             
    )

    # training  
    errors = []  
    for iteration in range(iterations):  
        model.train()  
        optimizer.zero_grad()    
        outputs = model(X_train)
        loss = criterion(outputs, Y_train.T) 
        loss.backward()  
        optimizer.step()  
        
    # evaluating 
    with torch.no_grad():  
        model.eval()  
        outputs = model(X_val)
        val_loss = criterion(outputs, Y_val.T) 
           
    return model, val_loss  

In [26]:
# Hyperparameter range 
input_size =  400                                 # Feature dimension per time step
hidden_sizes = [128, 256, 512]                    # Dimension of LSTM hidden state                    
output_size = 2                                   # Dimension of Labels  
learning_rates = [0.0001, 0.000095, 0.00015] 
iterations = 15000
batch_size = 512
dropout_coeffs = [0.25, 0.3, 0.35]

# Find the optimal hyperparameters  
best_val_loss = float('inf')        
best_params = None                 
best_model = None                  

for hidden_size, learning_rate, dropout_coeff in product(hidden_sizes, learning_rates, dropout_coeffs):  
    
    BATCH_SIZE = batch_size  
    model, val_loss = train_lstm_model(reshaped_X_train, Y_train, reshaped_X_val, Y_val, batch_size, 
                                         input_size, hidden_size, output_size,  
                                          learning_rate, iterations, dropout_coeff)  
    if val_loss < best_val_loss:  
        best_val_loss = val_loss
        best_model = model
        best_params = {  
            'hidden_size': hidden_size,  
            'learning_rate': learning_rate,  
            'dropout_coeff': dropout_coeff,  
        }         

print("Best hyperparameter:", best_params) 
   

Best hyperparameter: {'hidden_size': 512, 'learning_rate': 0.00015, 'dropout_coeff': 0.35}


In [27]:
# Model performance on the test set

reshaped_X_test = torch.tensor(reshaped_X_test, dtype=torch.float32).to(device)
with torch.no_grad():  
    best_model.eval()    
    outputs = best_model(reshaped_X_test)
test_predictions = outputs.cpu().T
test_predictions_recovered, Y_test_recovered = recover_predictions(test_predictions, Y_test, y_max)
test_loss = np.mean(abs(test_predictions_recovered - Y_test_recovered) * 100 / Y_test_recovered, axis=1)

print(f'Percentage error of inductance on the test set: {test_loss[0]}%')
print(f'Percentage error of arc length on the test set: {test_loss[1]}%')
print(f'Average Percentage error on the test set: {(test_loss[0]+test_loss[1])/2}%')
np.set_printoptions(precision=3, suppress=True)

# Compare with labels
print("Comparison:")
print(" Voltage    Predict-L Actual-L   Predict-Arc Actual-Arc")
comparison = np.concatenate((
    (X_test[0, :] * X_std[0] + X_mean[0]).T.reshape(-1, 1),  
    test_predictions_recovered[0].reshape(-1, 1),      
    Y_test_recovered[0].reshape(-1, 1),                
    test_predictions_recovered[1].reshape(-1, 1),      
    Y_test_recovered[1].reshape(-1, 1)                 
), axis=1)
print(comparison)

test_loss = np.mean(abs(test_predictions_recovered - Y_test_recovered) * 100 / Y_test_recovered, axis=0)
top_10_indices = np.argsort(test_loss)[-10:]  

print("Indices and error values of the top 10 samples with the largest errors:")  
for idx in top_10_indices:  
    print(f'Index: {idx}, \tError rate: {test_loss[idx]:.3f}%') 
  
print("\nPredicted and actual values of the top 10 samples with the largest errors:")  
print(" Voltage    Predict-L Actual-L   Predict-Arc Actual-Arc")  

for idx in top_10_indices: 
    
    voltage = X_test[0, idx] * X_std[0][0] + X_mean[0][0]

    pred_L = test_predictions_recovered[0, idx]  
    actual_L = Y_test_recovered[0, idx]  

    pred_Arc = test_predictions_recovered[1, idx]  
    actual_Arc = Y_test_recovered[1, idx]  
    pred_Arc_rounded = np.round(pred_Arc, 3)  
    actual_Arc_rounded = np.round(actual_Arc, 3)  

    print(f"{voltage:.0f}\t{pred_L:.0f}\t{actual_L:.0f}\t{pred_Arc_rounded:.3f}\t{actual_Arc_rounded:.3f}")

Percentage error of inductance on the test set: 0.947745680809021%
Percentage error of arc length on the test set: 0.8981080651283264%
Average Percentage error on the test set: 0.9229269027709961%
Comparison:
 Voltage    Predict-L Actual-L   Predict-Arc Actual-Arc
[[12000.     2252.718  2250.        0.648     0.64 ]
 [16000.     4995.825  5000.        0.458     0.45 ]
 [16000.     4990.314  5000.        6.891     6.9  ]
 ...
 [ 4000.     3368.116  3250.        4.477     4.5  ]
 [ 4000.     4112.193  4000.        4.272     4.3  ]
 [12000.     3981.267  4000.        0.185     0.19 ]]
Indices and error values of the top 10 samples with the largest errors:
Index: 78, 	Error rate: 4.241%
Index: 21, 	Error rate: 4.542%
Index: 199, 	Error rate: 5.115%
Index: 106, 	Error rate: 5.600%
Index: 116, 	Error rate: 6.171%
Index: 172, 	Error rate: 7.372%
Index: 79, 	Error rate: 12.207%
Index: 112, 	Error rate: 17.097%
Index: 257, 	Error rate: 17.515%
Index: 213, 	Error rate: 19.864%

Predicted and act