In [3]:
# Importing necessary libraries
import matplotlib.pyplot as plt
import torch
from torch import nn
import numpy as np
from icecream import ic
from tqdm import tqdm
from get_data import *
from dataloader import *

In [4]:
 #Define the LSTM model class

# Use the GPU if available
torch.set_default_dtype(torch.float64)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
#device="cpu"
print(device)

class LSTMmodel(nn.Module):

    def __init__(self, input_size, hidden_size, out_size, layers, future=1):

        super().__init__()

        self.hidden_size = hidden_size
        self.input_size = input_size
        self.future = future
        # Define LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=layers, batch_first=True)

        # Define linear layer
        self.linear = nn.Linear(hidden_size, out_size)

    def forward(self, seq):
    
        lstm_out, hidden = self.lstm(seq)
        #lstm_out = self.act(lstm_out)
        pred = self.linear(lstm_out)
        
        if self.future==1:
            return pred, hidden
        
        out = []
        if self.future > 1:
            for t in range(self.future):
                
                seq = torch.cat((seq[:,1:,:], pred[:,-1:,:]), dim=1)
                #seq = pred[:,-1:,:]
                
                #lstm_out, hidden = self.lstm(seq, hidden)
                lstm_out, hidden = self.lstm(seq)
                
                pred = self.linear(lstm_out)
                out.append(pred) 
                
        return out, hidden          







cpu


In [5]:
def test(test_data, model, steps=600, ws=10, plot_opt=False, n = 5, rand=True):

    #test_data = test_dataloader.get_all_data() 
    model.eval()
    loss_fn = nn.MSELoss()
    test_loss = 0
    test_loss_deriv = 0
    total_loss = 0
    
    if rand:
     np.random.seed(123)
 
    ids = np.random.choice(test_data.size(dim=0), min([n, test_data.size(dim=0)]), replace=False)
    ids = np.unique(ids)


    for i, x in enumerate(test_data):
        x=x.to(device)
        if i not in ids:
            continue

        with torch.inference_mode():

            pred = torch.zeros((steps, 3), device=device)
            pred_next_step = torch.zeros((steps, 3), device=device)

            if ws > 1:
                pred[0:ws, :] = x[0:ws, :]
                pred[:, 0] = x[:, 0]
                pred_next_step[0:ws, :] = x[0:ws, :]
                pred_next_step[:, 0] = x[:, 0]
            else:
                pred[0, :] = x[0, :]
                pred[:, 0] = x[:, 0]
                pred_next_step[0, :] = x[0, :]
                pred_next_step[:, 0] = x[:, 0]

            for i in range(len(x) - ws):

                out, _ = model(pred[i:i+ws, :])
                pred[i+ws, :] = pred[i+ws-1, :] + out[-1, :]
                pred_next_step[i+ws, :] = x[i+ws-1, :] + out[-1, :]
            
            test_loss += loss_fn(pred[:, 1], x[:, 1]).detach().cpu().numpy()
            test_loss_deriv += loss_fn(pred[:, 2], x[:, 2]).detach().cpu().numpy()

            total_loss += loss_fn(pred[:, 1:], x[:, 1:]).detach().cpu().numpy()

            if plot_opt:
                figure , axs = plt.subplots(1,3,figsize=(16,9))
            
                axs[0].plot(pred.detach().cpu().numpy()[:, 1], color="red", label="pred")
                axs[0].plot(pred_next_step.detach().cpu().numpy()[:, 1], color="green", label="next step from data")
                axs[0].plot(x.detach().cpu().numpy()[:, 1], color="blue", label="true", linestyle="dashed")
                axs[0].set_title("position")
                axs[0].grid()
                axs[0].legend()

                axs[1].plot(pred.detach().cpu().numpy()[:, 2], color="red", label="pred")
                axs[1].plot(pred_next_step.detach().cpu().numpy()[:, 2], color="green", label="next step from data")
                axs[1].plot(x.detach().cpu().numpy()[:, 2], color="blue", label="true", linestyle="dashed")
                axs[1].set_title("speed")
                axs[1].grid()
                axs[1].legend()

                axs[2].plot(x.detach().cpu().numpy()[:,0], label="pressure")
                axs[2].set_title("pressure")
                axs[2].grid()
                axs[2].legend()

                plt.grid(True)
                plt.legend()
                plt.show()
            
    return np.mean(test_loss), np.mean(test_loss_deriv), np.mean(total_loss)


In [6]:
#with future:

def train(input_data, model, weight_decay, future_decay, learning_rate=0.001, ws=0, future=1):

    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=weight_decay)

    model.train()
    total_loss = []

    for k, (inp, label) in enumerate(input_data):  # inp = (u, x) label = x

        inp=inp.to(device)
        label=label.to(device)

        # Predict one timestep :
        output, _ = model(inp)

        out = []
        for i in range(future):
            out.append(inp[:, :, :] + output[i])

        # reset the gradient
        
        optimizer.zero_grad(set_to_none=True)
        # calculate the error

        loss = loss_fn(out[0][:,-1,:], label[:, 0, :])

        for t in range(1, future):
            loss += loss_fn(out[t][:,-1,:], label[:, t, :])

        loss.backward(retain_graph=True)
        optimizer.step()

        total_loss.append(loss.detach().cpu().numpy())

   # return the average error of the next step prediction
    return np.mean(total_loss)


In [15]:

# set some parameters for learning 
                    #window_size, h_size,  l_num,  epochs, learning_rate,  part_of_data,   weight_decay,   percentage_of_data    future_decay      batch_size
parameter_sets  =    [4,             6 ,      1,       100,       0.0005,          0,           1e-5,               0.1,               0.5 ,           2000]
           # {'lr': 0.0006226762071294569, 'ws': 9, 'bs': 51, 'hs': 13, 'ls': 2}         
window_size, h_size, l_num, epochs, learning_rate, part_of_data, weight_decay,  percentage_of_data, future_decay, batch_size = parameter_sets

future = 3

# Initialize the LSTM model
model = LSTMmodel(input_size=3, hidden_size=h_size, out_size=3, layers=l_num, future=future).to(device)
#model = RNNmodel(input_size=3, hidden_size=h_size, out_size=2, layers=l_num).to(device)

# Generate input data (the data is normalized and some timesteps are cut off)
input_data = get_data(path = "save_data_test4.csv", 
                        timesteps_from_data=0, 
                        skip_steps_start = 0,
                        skip_steps_end = 0, 
                        drop_half_timesteps = True,
                        normalise_s_w=True,
                        rescale_p=False,
                        num_inits=part_of_data)

cut_off_timesteps = 800
print(input_data.size())
#Split data into train and test sets
np.random.seed(2312)
num_of_inits_train = int(len(input_data)*percentage_of_data)
train_inits = np.random.choice(np.arange(len(input_data)),num_of_inits_train,replace=False)
test_inits = np.array([x for x in range(len(input_data)) if x not in train_inits])

train_data = input_data[train_inits,:input_data.size(dim=1)-cut_off_timesteps,:]
test_data = input_data[test_inits,:,:]
print(train_data.size())

data_set  = CustomDataset(train_data, window_size=window_size, future=future)
train_dataloader = DataLoader(data_set, batch_size=batch_size, pin_memory=True, drop_last=True)

losses = []
average_traj_err_train = []
average_traj_err_test = []

for e in tqdm(range(epochs)):
    
    loss_epoch = train(train_dataloader, model, weight_decay, future_decay, learning_rate=learning_rate, ws=window_size, future=future)
    losses.append(loss_epoch)

    # Every few epochs get the error MSE of the true data
    # compared to the network prediction starting from some initial conditions
    if (e+1)%10 == 0:
        model.future = 1
        _,_, err_train = test(train_data, model, steps=train_data.size(dim=1), ws=window_size, plot_opt=False, n = 20)
        model.future = future
        if err_train < 1:
            print("stopped early")
            break
        #_,_, err_test = test(test_data, model, steps=test_data.size(dim=1), ws=window_size, plot_opt=False, n = 20)
        average_traj_err_train.append(err_train)
        #average_traj_err_test.append(err_test)
        print(f"Epoch: {e}, the average next step error was : loss_epoch")
        print(f"Average error over full trajectories: training data : {err_train}")
        #print(f"Average error over full trajectories: testing data : {err_test}")

model.future=1
_,_, err_train = test(train_data, model, steps=train_data.size(dim=1), ws=window_size, plot_opt=False, n = 100)
#_,_, err_test = test(test_data, model, steps=test_data.size(dim=1), ws=window_size, plot_opt=False, n = 100)
print(f"TRAINING FINISHED: Average error over full trajectories: training data : {err_train}")
#print(f"TRAINING FINISHED: Average error over full trajectories: testing data : {err_test}")
        

torch.Size([500, 550, 3])
torch.Size([50, 300, 3])


 10%|█         | 10/100 [00:14<02:53,  1.93s/it]

Epoch: 9, the average next step error was : loss_epoch
Average error over full trajectories: training data : 9544.69974688472


 20%|██        | 20/100 [00:30<03:04,  2.31s/it]

Epoch: 19, the average next step error was : loss_epoch
Average error over full trajectories: training data : 15832.609713694692


 30%|███       | 30/100 [00:46<02:27,  2.10s/it]

Epoch: 29, the average next step error was : loss_epoch
Average error over full trajectories: training data : 30087.7331811518


 40%|████      | 40/100 [01:02<02:20,  2.34s/it]

Epoch: 39, the average next step error was : loss_epoch
Average error over full trajectories: training data : 30549.586985085323


 50%|█████     | 50/100 [01:19<01:54,  2.29s/it]

Epoch: 49, the average next step error was : loss_epoch
Average error over full trajectories: training data : 11343.187290185257


 60%|██████    | 60/100 [01:34<01:24,  2.12s/it]

Epoch: 59, the average next step error was : loss_epoch
Average error over full trajectories: training data : 1043.4443404588756


 70%|███████   | 70/100 [01:51<01:11,  2.38s/it]

Epoch: 69, the average next step error was : loss_epoch
Average error over full trajectories: training data : 873.2963882385181


 80%|████████  | 80/100 [02:06<00:38,  1.94s/it]

Epoch: 79, the average next step error was : loss_epoch
Average error over full trajectories: training data : 1077.869497650577


 90%|█████████ | 90/100 [02:23<00:24,  2.44s/it]

Epoch: 89, the average next step error was : loss_epoch
Average error over full trajectories: training data : 1057.1844908948485


100%|██████████| 100/100 [02:41<00:00,  1.61s/it]

Epoch: 99, the average next step error was : loss_epoch
Average error over full trajectories: training data : 843.8771256572977





TRAINING FINISHED: Average error over full trajectories: training data : 2241.2416653607843


In [7]:
# Save the model
path = f"Ventil_trained_NNs\my_example_model.pth"
torch.save(model.state_dict(), path)

# Load the model and test it on the test data

path = "Ventil_trained_NNs\my_example_model.pth"
model = LSTMmodel(input_size=3, hidden_size=h_size, out_size=3, layers=l_num, future=1).to(device)
#model = RNNmodel(input_size=3, hidden_size=h_size, out_size=2, layers=l_num).to(device)

model.load_state_dict(torch.load(path, map_location=torch.device(device)))

train_data = input_data[train_inits,:,:]

test_loss, test_loss_deriv, total_loss = test(test_data, model, steps=input_data.size(dim=1), ws=window_size, plot_opt=True , n = 1, rand=False)
#test_loss, test_loss_deriv, total_loss = test(train_data, model, steps=input_data.size(dim=1), ws=window_size, plot_opt=True , n = 1, rand=False)


ic(test_loss, test_loss_deriv, total_loss)

NameError: name 'model' is not defined

In [13]:
params =                        {
                           "experiment_number" : 6,
                           "window_size" : 2,
                           "h_size" : 8,
                           "l_num" : 1,
                           "epochs" : 40,
                           "learning_rate" : 0.001,
                           "part_of_data" : 0, 
                           "weight_decay" : 1e-5,
                           "percentage_of_data" : 0.8,
                           "future_decay"  : 0.1,
                           "batch_size" : 400,
                           "future" : 4
                        }


path = "Ventil_trained_NNs\lstm_ws0.pth"

# Initialize the LSTM model
model = LSTMmodel(input_size=3, hidden_size=params["h_size"], out_size=2, layers=params["l_num"]).to(device)

model.load_state_dict(torch.load(path, map_location=torch.device(device)))

# Generate input data (the data is normalized and some timesteps are cut off)
input_data = get_data(path = "save_data_test4.csv", 
                timesteps_from_data=0, 
                skip_steps_start = 0,
                skip_steps_end = 0, 
                drop_half_timesteps = False,
                normalise_s_w="mean",
                rescale_p=False,
                num_inits=params["part_of_data"])

cut_off_timesteps = 100

#Split data into train and test sets
num_of_inits_train = int(len(input_data)*params["percentage_of_data"])
train_inits = np.random.choice(np.arange(len(input_data)),num_of_inits_train,replace=False)
test_inits = np.array([x for x in range(len(input_data)) if x not in train_inits])

train_data = input_data[train_inits,:input_data.size(dim=1)-cut_off_timesteps,:]
test_data = input_data[test_inits,:,:]

data_set  = CustomDataset(train_data, window_size=params["window_size"], future=params["future"])
train_dataloader = DataLoader(data_set, batch_size=params["batch_size"], pin_memory=True, drop_last=True)

test(test_data, model, steps=test_data.size(dim=1), ws=params["window_size"], plot_opt=True, n = 5, rand=False)

RuntimeError: Error(s) in loading state_dict for LSTMmodel:
	size mismatch for lstm.weight_ih_l0: copying a param with shape torch.Size([24, 3]) from checkpoint, the shape in current model is torch.Size([32, 3]).
	size mismatch for lstm.weight_hh_l0: copying a param with shape torch.Size([24, 6]) from checkpoint, the shape in current model is torch.Size([32, 8]).
	size mismatch for lstm.bias_ih_l0: copying a param with shape torch.Size([24]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for lstm.bias_hh_l0: copying a param with shape torch.Size([24]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for linear.weight: copying a param with shape torch.Size([2, 6]) from checkpoint, the shape in current model is torch.Size([2, 8]).