In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

# from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import auc, precision_recall_curve
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_score, recall_score, f1_score
# from sklearn.metrics import mean_squared_error, mean_absolute_error

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
from torch import nn, optim

import random
import copy

plt.style.use('fivethirtyeight')

In [2]:
!nvidia-smi

Sun Apr 28 21:42:47 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 Ti      On | 00000000:65:00.0 Off |                  N/A |
| 30%   44C    P2               37W / 285W|    420MiB / 12282MiB |      8%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce RTX 4070 Ti      On | 00000000:B3:00.0 Off |  

# Functions

In [3]:
def ts_array_create(dirname, dir_list, time_seq):
    
    columns = ['RSRP', 'RSRQ', 'RSRP1', 'RSRQ1', 'RSRP2', 'RSRQ2',
               'nr-RSRP', 'nr-RSRQ', 'nr-RSRP1', 'nr-RSRQ1', 'nr-RSRP2', 'nr-RSRQ2']
    
    def reamin_HO_time(y_train):
        def f(L):    
            for i, e in enumerate(L):
                if e: return i+1
            return 0

        out = []
        for a2 in y_train:
            a1_out = []
            for a1 in a2:
                a1_out.append(a1.any())
      
            out.append(f(a1_out))
        return out
    
    def HO(y_train):
        out = []
        for a2 in y_train:
            if sum(a2.reshape(-1)) == 0: ho = 0
            elif sum(a2.reshape(-1)) > 0: ho = 1
            out.append(ho)
        return out

    split_time = []
    for i, f in enumerate(tqdm(dir_list)):
    
        f = os.path.join(dirname, f)
        df = pd.read_csv(f)

        # preprocess data with ffill method
        del df['Timestamp'], df['lat'], df['long'], df['gpsspeed']
        # df[columns] = df[columns].replace(0, np.nan)
        # df[columns] = df[columns].fillna(method='ffill')
        # df.dropna(inplace=True)
        
        df.replace(np.nan,0,inplace=True); df.replace('-',0,inplace=True)
        
        X = df[features]
        Y = df[target]

        Xt_list = []
        Yt_list = []

        for j in range(time_seq):
            X_t = X.shift(periods=-j)
            Xt_list.append(X_t)
    
        for j in range(time_seq,time_seq+predict_t):
            Y_t = Y.shift(periods=-(j))
            Yt_list.append(Y_t)

        # YY = Y.shift(periods=-(0))

        X_ts = np.array(Xt_list); X_ts = np.transpose(X_ts, (1,0,2)); X_ts = X_ts[:-(time_seq+predict_t-1),:,:]
        Y_ts = np.array(Yt_list); Y_ts = np.transpose(Y_ts, (1,0,2)); Y_ts = Y_ts[:-(time_seq+predict_t-1),:,:]
        split_time.append(len(X_ts))

        if i == 0:
            X_final = X_ts
            Y_final = Y_ts
        else:
            X_final = np.concatenate((X_final,X_ts), axis=0)
            Y_final = np.concatenate((Y_final,Y_ts), axis=0)

    split_time = [(sum(split_time[:i]), sum(split_time[:i])+x) for i, x in enumerate(split_time)]
    
    return X_final, np.array(HO(Y_final)), np.array(reamin_HO_time(Y_final)), split_time # forecast HO

class RNN_Dataset_simple(Dataset):
    """
    Dataset take all csv file specified in dir_list in directory dirname.
    Transfer csvs to (features, label) pair

    """
    def __init__(self, X, y):

        # self.inputs = torch.FloatTensor(X)
        # self.labels = torch.FloatTensor(y)
        self.inputs = torch.FloatTensor(X.astype(np.float32))
        self.labels = torch.FloatTensor(y.astype(np.float32))
        
    def __len__(self):
        
        return len(self.labels)

    def __getitem__(self, idx):
        
        data = self.inputs[idx]
        label = self.labels[idx]
        
        return data, label

In [4]:
def days_in_file(file, dates):
    
    for date in dates:
        if date in file: return True 
    return False

def train_valid_split(L, valid_size=0.2):
    
    length = len(L)
    v_num = int(length*valid_size)
    v_files = random.sample(L, v_num)
    t_files = list(set(L) - set(v_files))
    
    return t_files, v_files

In [5]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2"
    
    print(f"Random seed set as {seed}")


# Load data

In [6]:
# Time sequence length and prediction time length
seed = 55688
time_seq = 20
predict_t = 10
valid_ratio = 0.2
task = 'classification'

batch_size = 32

In [7]:
# Setup seed
set_seed(seed)

# Get GPU
device_count = torch.cuda.device_count()
num_of_gpus = device_count

for i in range(device_count):
    print("GPU {}: {}".format(i, torch.cuda.get_device_name(i)))
    gpu_id = i

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Save best model to 
save_path = "../model"

# Define DataSet
dirname = "../data/single"
dir_list = os.listdir(dirname)
dir_list = [f for f in dir_list if ( f.endswith('.csv') and (not 'sm' in f) ) ]

train_dates = ['03-26', '04-01']
test_dates = ['04-10']
    
# train_dir_list = [f for f in dir_list if ( f.endswith('.csv') and ('All' in f) and days_in_file(f, train_dates) )]
# test_dir_list = [f for f in dir_list if ( f.endswith('.csv') and ('All' in f) and days_in_file(f, test_dates) )]

train_dir_list, test_dir_list = train_valid_split(dir_list, valid_ratio)
train_dir_list += [f for f in os.listdir(dirname) if 'sm' in f]

# features = ['LTE_HO', 'MN_HO', 'eNB_to_ENDC', 'gNB_Rel', 'gNB_HO', 'RLF', 'SCG_RLF',
#         'num_of_neis', 'RSRP', 'RSRQ', 'RSRP1', 'RSRQ1', 'RSRP2', 'RSRQ2',
#         'nr-RSRP', 'nr-RSRQ', 'nr-RSRP1', 'nr-RSRQ1', 'nr-RSRP2', 'nr-RSRQ2' ]
features = ['LTE_HO', 'MN_HO', 'eNB_to_ENDC', 'gNB_Rel', 'gNB_HO', 'RLF', 'SCG_RLF',
        'num_of_neis', 'RSRP', 'RSRQ', 'RSRP1', 'RSRQ1','nr-RSRP', 'nr-RSRQ', 'nr-RSRP1', 'nr-RSRQ1']
# features = ['LTE_HO', 'MN_HO', 'eNB_to_ENDC', 'gNB_Rel', 'gNB_HO', 'RLF', 'SCG_RLF',
#         'num_of_neis', 'RSRP', 'RSRQ', 'RSRP1', 'RSRQ1', 'RSRP2', 'RSRQ2']

num_of_features = len(features)

# target = ['LTE_HO', 'MN_HO'] # For eNB HO.
# target = ['eNB_to_ENDC'] # Setup gNB
target = ['gNB_Rel', 'gNB_HO'] # For gNB HO.
# target = ['RLF'] # For RLF
# target = ['SCG_RLF'] # For scg failure
# target = ['dl-loss'] # For DL loss
# target = ['ul-loss'] # For UL loss

# Data
print('Loading training data...')
X_train, y_train1, y_train2, split_time_train = ts_array_create(dirname, train_dir_list, time_seq)

train_dataset = RNN_Dataset_simple(X_train, y_train1)
train_dataloader1 = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

cond = y_train2 > 0
X_train_fore = X_train[cond]
y_train2_fore = y_train2[cond]
train_dataset = RNN_Dataset_simple(X_train_fore, y_train2_fore)
train_dataloader2 = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

print('Loading testing data...')
X_test, y_test1, y_test2, split_time_test = ts_array_create(dirname, test_dir_list, time_seq)

test_dataset = RNN_Dataset_simple(X_test, y_test1)
test_dataloader1 = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

cond = y_test2 > 0
X_test_fore = X_test[cond]
y_test2_fore = y_test2[cond]
test_dataset = RNN_Dataset_simple(X_test_fore, y_test2_fore)
test_dataloader2 = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


Random seed set as 55688
GPU 0: NVIDIA GeForce RTX 4070 Ti
GPU 1: NVIDIA GeForce RTX 4070 Ti
Loading training data...


  0%|          | 0/349 [00:00<?, ?it/s]

Loading testing data...


  0%|          | 0/33 [00:00<?, ?it/s]

In [8]:
a,b = next(iter(train_dataloader1))
input_dim, out_dim = a.shape[2], 1
a.shape

torch.Size([32, 20, 16])

# Model

In [9]:
class RNN_Cls(nn.Module):
    '''
    Using LSTM or GRU.
    '''
    def __init__(self, input_dim, out_dim, hidden_dim, num_layer, dropout, rnn):

        super().__init__()
        self.in_dim = input_dim
        self.out_dim = out_dim
        self.hid_dim = hidden_dim
        self.num_layer = num_layer
        self.dropout = dropout

        # input_size: num of features; hidden_size: num of hidden state h
        # num_layers: number of recurrent layer; seq; batch_first: batch first than seq
        if rnn == 'LSTM':
            self.rnn= nn.LSTM(input_dim, hidden_dim, num_layer, batch_first=True, dropout=dropout)
        elif rnn == 'GRU':
            self.rnn= nn.GRU(input_dim, hidden_dim, num_layer, batch_first=True, dropout=dropout)

        self.linear = nn.Linear(hidden_dim, out_dim) # For binary classification

    def forward(self,batch_input):

        out,_ = self.rnn(batch_input)
        out = self.linear(out[:,-1, :])  #Extract out of last time step (N, L, Hout) -> (Batch, time_seq, output)
        
        out = torch.sigmoid(out) # Binary Classifier

        return out

class RNN_Fst(nn.Module):
    '''
    Using LSTM or GRU.
    '''
    def __init__(self, input_dim, out_dim, hidden_dim, num_layer, dropout, rnn):

        super().__init__()
        self.in_dim = input_dim
        self.out_dim = out_dim
        self.hid_dim = hidden_dim
        self.num_layer = num_layer
        self.dropout = dropout

        # input_size: num of features; hidden_size: num of hidden state h
        # num_layers: number of recurrent layer; seq; batch_first: batch first than seq
        if rnn == 'LSTM':
            self.rnn= nn.LSTM(input_dim, hidden_dim, num_layer, batch_first=True, dropout=dropout)
        elif rnn == 'GRU':
            self.rnn= nn.GRU(input_dim, hidden_dim, num_layer, batch_first=True, dropout=dropout)

        self.linear = nn.Linear(hidden_dim, out_dim) # For binary classification

    def forward(self,batch_input):

        out,_ = self.rnn(batch_input)
        out = self.linear(out[:,-1, :])  #Extract out of last time step (N, L, Hout) -> (Batch, time_seq, output)

        return out

## Setting

In [20]:
# Hyperparameters
n_epochs = 600
lr = 0.001
batch_size = 32
hidden_dim = 128
num_layer = 2
dropout = 0

rnn = 'LSTM' # 'LSTM' or 'GRU'

In [21]:
set_seed(seed)
# Define model and optimizer

classifier = RNN_Cls(input_dim, out_dim, hidden_dim, num_layer, dropout, rnn).to(device)
optimizer = optim.Adam(classifier.parameters(), lr=lr)

criterion = nn.BCELoss()
# criterion = nn.MSELoss()

# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[600, 1000], gamma=0.4)

Random seed set as 55688


# Training

In [22]:
def train_cls(n_epochs, train_dataloader, test_dataloader, best_model_path, early_stopping_patience=30):
    
    # 初始化變數
    best_loss = float('inf')
    early_stopping_counter = 0
    early_stopping_patience = early_stopping_patience
    
    for epoch in tqdm(range(1, n_epochs + 1)):
        classifier.train()

        train_losses = []
        
        trues = np.array([])
        preds = np.array([])

        for i, (features, labels) in enumerate(train_dataloader):
            
            features = features.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            out = classifier(features)
            
            trues = np.concatenate((trues, labels.cpu().numpy()), axis=0)
            preds = np.concatenate((preds, out.squeeze().detach().cpu().numpy()), axis=0)
            
            loss = criterion(out.squeeze(), labels)
            loss.backward()
            optimizer.step()
            
            # metrics calculate

            train_losses.append(loss.item())

        precision, recall, _ = precision_recall_curve(trues, preds)
        aucpr = auc(recall, precision)

        fpr, tpr, _ = roc_curve(trues, preds)
        roc_auc = auc(fpr, tpr)
        

        train_loss = np.mean(train_losses)
        train_losses_for_epochs.append(train_loss) # Record Loss

        print(f'Epoch {epoch} train loss: {train_loss}, auc: {roc_auc}, aucpr: {aucpr}', end = '; ')
        
        # Validate
        classifier.eval()
        valid_losses = []

        trues = np.array([])
        preds = np.array([])
        
        for i, (features, labels) in enumerate(test_dataloader):
            
            features = features.to(device)
            labels = labels.to(device)

            out = classifier(features)

            trues = np.concatenate((trues, labels.cpu().numpy()), axis=0)
            preds = np.concatenate((preds, out.squeeze().detach().cpu().numpy()), axis=0)
            
            loss = criterion(out.squeeze(), labels)

            valid_losses.append(loss.item())
        
        precision, recall, _ = precision_recall_curve(trues, preds)
        aucpr = auc(recall, precision)
        
        fpr, tpr, _ = roc_curve(trues, preds)
        roc_auc = auc(fpr, tpr)
        
        valid_loss = np.mean(valid_losses)
        valid_losses_for_epochs.append(valid_loss) # Record Loss
        
        print(f'Epoch {epoch} valid loss: {valid_loss}, auc: {roc_auc}, aucpr: {aucpr}')
        

        if valid_loss < best_loss:
            
            best_loss = valid_loss
            early_stopping_counter = 0
            torch.save(classifier.state_dict(), best_model_path)
            # best_model.load_state_dict(copy.deepcopy(classifier.state_dict()))
            print(f'Best model found! Loss: {valid_loss}')
            
        else:
            # 驗證損失沒有改善，計數器加1
            early_stopping_counter += 1
            
            # 如果計數器達到早期停止的耐心值，則停止訓練
            if early_stopping_counter >= early_stopping_patience:
                print('Early stopping triggered.')
                break


In [23]:
# For record loss
train_losses_for_epochs = []
validation_losses_for_epochs = []
valid_losses_for_epochs = []

# Save best model to ...
best_model_path = os.path.join(save_path, 'lte_HO_cls_RNN.pt')
print(best_model_path)

early_stopping_patience = 50

../model/lte_HO_cls_RNN.pt


In [24]:
# # visulized on many sample on validation data
# sample_value = 2
# # samples = random.sample(split_time_test, sample_value)
# samples = [split_time_test[8], split_time_test[9]]

# fig, axs = plt.subplots(1, sample_value, figsize=(14, 2.5))

# # y_test
# # preds

# for i in range(sample_value):
#     true = [y_test1[i] for i in range(samples[i][0], samples[i][1])]
#     axs[i].plot(true, label='true')
#     prediction = [preds[i] for i in range(samples[i][0], samples[i][1])]
#     # prediction = [1 if preds[i] > 0.5 else 0  for i in range(samples[i][0], samples[i][1])]
#     axs[i].plot(prediction, label='pred')

# plt.legend()
# plt.show()

In [25]:
train_cls(n_epochs, train_dataloader1, test_dataloader1, best_model_path, early_stopping_patience)

  0%|          | 0/600 [00:00<?, ?it/s]

Epoch 1 train loss: 0.5220834463641927, auc: 0.7534707135451741, aucpr: 0.5501946054343847; Epoch 1 valid loss: 0.5473681467163206, auc: 0.7633205559653998, aucpr: 0.6172587018045068
Best model found! Loss: 0.5473681467163206
Epoch 2 train loss: 0.49576472008875866, auc: 0.7924494666800321, aucpr: 0.6100383987147024; Epoch 2 valid loss: 0.5165096078877257, auc: 0.788462156078287, aucpr: 0.6442502933053195
Best model found! Loss: 0.5165096078877257
Epoch 3 train loss: 0.48351824672400184, auc: 0.8087010709695576, aucpr: 0.6345529642429691; Epoch 3 valid loss: 0.500070560611924, auc: 0.8108908386622166, aucpr: 0.668208002847516
Best model found! Loss: 0.500070560611924
Epoch 4 train loss: 0.4790722151664825, auc: 0.8131517320604245, aucpr: 0.6409644077930694; Epoch 4 valid loss: 0.4914995712866442, auc: 0.8223694845323126, aucpr: 0.6860832924374562
Best model found! Loss: 0.4914995712866442
Epoch 5 train loss: 0.467615180087438, auc: 0.8261380817987884, aucpr: 0.6647945728975754; Epoch 5

In [26]:
# Test
def test(test_dataloader):
    best_model = RNN_Cls(input_dim, out_dim, hidden_dim, num_layer, dropout, rnn).to(device)
    best_model.load_state_dict(torch.load(best_model_path))
    best_model.eval()

    with torch.no_grad():
        
        best_model.eval()
        valid_losses = []

        trues = np.array([])
        preds = np.array([])
        
        for i, (features, labels) in enumerate(test_dataloader):
            
            features = features.to(device)
            labels = labels.to(device)

            out = best_model(features)

            trues = np.concatenate((trues, labels.cpu().numpy()), axis=0)
            preds = np.concatenate((preds, out.squeeze().detach().cpu().numpy()), axis=0)
            
            loss = criterion(out.squeeze(), labels)

            valid_losses.append(loss.item())
        
        precision, recall, _ = precision_recall_curve(trues, preds)
        aucpr = auc(recall, precision)
        threshold = 0.5
        p = precision_score(trues, [1 if pred > threshold else 0 for pred in preds])
        r = recall_score(trues, [1 if pred > threshold else 0 for pred in preds])
        f1 = f1_score(trues, [1 if pred > threshold else 0 for pred in preds])
        
        fpr, tpr, _ = roc_curve(trues, preds)
        roc_auc = auc(fpr, tpr)
        
        valid_loss = np.mean(valid_losses)

        print(f'valid loss {valid_loss}, roc_auc {roc_auc}, aucpr {aucpr}')
        
        return valid_loss, roc_auc, aucpr, p, r, f1
        

# test(test_dataloader1)
_, roc_auc, aucpr, p, r, f1 = test(test_dataloader1)
print()
print(f'roc_auc: {roc_auc}, aucpr: {aucpr}, precision: {p}, recall: {r}, f1: {f1}')

valid loss 0.39721528113878957, roc_auc 0.8907789714415705, aucpr 0.8201003977242141

roc_auc: 0.8907789714415705, aucpr: 0.8201003977242141, precision: 0.7314088484468152, recall: 0.7454429165334187, f1: 0.738359201773836


# Others

## Grid Search

In [21]:
from IPython.display import display, clear_output
import itertools

n_epochs = 600
lrs = [0.001, 0.01, 0.1]
hidden_dims = [32, 64, 128]
num_layers = [1, 2]
dropout = 0

early_stopping_patience = 50
rnn = 'GRU'


In [23]:
f_out = 'lte_ho_cls_rnn.csv'
f_out = open(f_out, 'w')
cols_out = ['lr','hidden_dim','num_layer', 'valid_loss','auc','aucpr', 'p', 'r', 'f1']
f_out.write(','.join(cols_out)+'\n')

for lr, hidden_dim, num_layer in itertools.product(lrs, hidden_dims, num_layers):
    
    set_seed(seed)
    
    # Model and optimizer
    classifier = RNN_Cls(input_dim, out_dim, hidden_dim, num_layer, dropout, rnn).to(device)
    optimizer = optim.Adam(classifier.parameters(), lr=lr)

    criterion = nn.BCELoss()
    
    # For record loss
    train_losses_for_epochs = []
    validation_losses_for_epochs = []
    valid_losses_for_epochs = []

    # Save best model to ... 
    best_model_path = os.path.join(save_path, 'lte_HO_cls_RNN.pt')
    
    train_cls(n_epochs, train_dataloader1, test_dataloader1, best_model_path, early_stopping_patience)
    clear_output(wait=True)
    
    print(f'For learning_rate = {lr}, hidden_dim = {hidden_dim}, num_layer = {num_layer}.')
    valid_loss, roc_auc, aucpr, p, r, f1 = test(test_dataloader1)
    
    cols_out = [lr, hidden_dim, num_layer, valid_loss, roc_auc, aucpr, p, r, f1]
    cols_out = [str(n) for n in cols_out]
    f_out.write(','.join(cols_out)+'\n')

f_out.close()

For learning_rate = 0.001, hidden_dim = 32, num_layer = 1.
valid loss 0.549439296380997, roc_auc 0.7551104475240914, aucpr 0.6032514706158869
Random seed set as 55688


  0%|          | 0/600 [00:00<?, ?it/s]

Epoch 1 train loss: 0.541000144682842, auc: 0.7118112640753881, aucpr: 0.48047472135696795; Epoch 1 valid loss: 0.5678944449090444, auc: 0.722749202159031, aucpr: 0.5656147478250108
Best model found! Loss: 0.5678944449090444
Epoch 2 train loss: 0.514731068915407, auc: 0.7666205275832194, aucpr: 0.5752750753710775; Epoch 2 valid loss: 0.5345588791103901, auc: 0.7709914587279697, aucpr: 0.6144170002258098
Best model found! Loss: 0.5345588791103901
Epoch 3 train loss: 0.49325764254889504, auc: 0.7984053932025881, aucpr: 0.6185327048338755; Epoch 3 valid loss: 0.5354040742046042, auc: 0.7819240720280954, aucpr: 0.6282306262857146
Epoch 4 train loss: 0.4835167354683925, auc: 0.8093598748551106, aucpr: 0.6334439065249375; Epoch 4 valid loss: 0.5254436214123103, auc: 0.7894912654082193, aucpr: 0.6352728463651925
Best model found! Loss: 0.5254436214123103
Epoch 5 train loss: 0.4731254353979983, auc: 0.8202287578174378, aucpr: 0.6513563960219914; Epoch 5 valid loss: 0.540102713319302, auc: 0.77

KeyboardInterrupt: 

## Forecast

In [24]:
# Hyperparameters
n_epochs = 600
lr = 0.001
batch_size = 32
hidden_dim = 128
num_layer = 2
dropout = 0

rnn = 'GRU' # 'LSTM' or 'GRU'

In [25]:
set_seed(seed)
forecaster = RNN_Fst(input_dim, out_dim, hidden_dim, num_layer, dropout, rnn).to(device)
optimizer = optim.Adam(forecaster.parameters(), lr=lr)

criterion = nn.MSELoss()

Random seed set as 55688


In [26]:
def train_fst(n_epochs, train_dataloader, test_dataloader, best_model_path, early_stopping_patience=30):
    
    def rmse(predictions, targets):
        return torch.sqrt(F.mse_loss(predictions, targets))

    def mae(predictions, targets):
        return torch.mean(torch.abs(predictions - targets))
    
    # 初始化變數
    best_loss = float('inf')
    early_stopping_counter = 0
    early_stopping_patience = early_stopping_patience
    
    for epoch in range(1, n_epochs + 1):

        forecaster.train()

        train_losses = []
        
        trues = torch.tensor([]).to(device)
        preds = torch.tensor([]).to(device)

        for i, (features, labels) in enumerate(train_dataloader):
            
            features = features.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            out = forecaster(features)
            
            trues = torch.cat((trues, labels), axis=0)
            preds = torch.cat((preds, out.squeeze().detach()), axis=0)
            
            loss = criterion(out.squeeze(), labels)
            loss.backward()
            optimizer.step()
                    
            # metrics calculate
      
            train_losses.append(loss.item())

        train_loss = np.mean(train_losses)
        train_losses_for_epochs.append(train_loss) # Record Loss

        rmse_error = rmse(preds, trues)
        mae_error = mae(preds, trues)
        
        print(f'Epoch {epoch} train loss: {train_loss}, rmse: {rmse_error}, mae: {mae_error}', end = '; ')
        
        # Validate
        forecaster.eval()
        valid_losses = []

        trues = torch.tensor([]).to(device)
        preds = torch.tensor([]).to(device)
        
        for i, (features, labels) in enumerate(test_dataloader):
            
            features = features.to(device)
            labels = labels.to(device)

            out = forecaster(features)

            trues = torch.cat((trues, labels), axis=0)
            preds = torch.cat((preds, out.squeeze().detach()), axis=0)
            
            loss = criterion(out.squeeze(), labels)

            valid_losses.append(loss.item())
        
        valid_loss = np.mean(valid_losses)
        valid_losses_for_epochs.append(valid_loss) # Record Loss
        
        rmse_error = rmse(preds, trues)
        mae_error = mae(preds, trues)

        print(f'Epoch {epoch} valid loss: {valid_loss}, rmse: {rmse_error}, mae: {mae_error}')
        
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            early_stopping_counter = 0
            torch.save(forecaster.state_dict(), best_model_path)
            # best_model.load_state_dict(copy.deepcopy(classifier.state_dict()))
            print(f'Best model found! Loss: {valid_loss}')
            
        else:
            # 驗證損失沒有改善，計數器加1
            early_stopping_counter += 1
            
            # 如果計數器達到早期停止的耐心值，則停止訓練
            if early_stopping_counter >= early_stopping_patience:
                print('Early stopping triggered.')
                break


In [27]:
# For record loss
train_losses_for_epochs = []
validation_losses_for_epochs = []
valid_losses_for_epochs = []

# Save best model to ... 
best_model_path = os.path.join(save_path, 'lte_HO_fst_RNN.pt')

early_stopping_patience = 50

In [28]:
train_fst(n_epochs, train_dataloader2, test_dataloader2, best_model_path, early_stopping_patience)

Epoch 1 train loss: 7.177642323063538, rmse: 2.6792397499084473, mae: 2.269796133041382; Epoch 1 valid loss: 6.251005211654975, rmse: 2.50063419342041, mae: 2.120218515396118
Best model found! Loss: 6.251005211654975
Epoch 2 train loss: 6.235113834912798, rmse: 2.4970955848693848, mae: 2.0826523303985596; Epoch 2 valid loss: 5.995686071259635, rmse: 2.4488203525543213, mae: 2.076934576034546
Best model found! Loss: 5.995686071259635
Epoch 3 train loss: 6.041770482485274, rmse: 2.4580719470977783, mae: 2.032453775405884; Epoch 3 valid loss: 5.85856412381542, rmse: 2.420503854751587, mae: 2.0303750038146973
Best model found! Loss: 5.85856412381542
Epoch 4 train loss: 5.9665968686078505, rmse: 2.4427034854888916, mae: 2.0079736709594727; Epoch 4 valid loss: 5.812350878910142, rmse: 2.410991668701172, mae: 2.02079701423645
Best model found! Loss: 5.812350878910142
Epoch 5 train loss: 5.937625103503202, rmse: 2.436760425567627, mae: 2.0041685104370117; Epoch 5 valid loss: 5.8309624462711565

In [29]:
# Test
def rmse(predictions, targets):
    return torch.sqrt(F.mse_loss(predictions, targets))

def mae(predictions, targets):
    return torch.mean(torch.abs(predictions - targets))

def test2(test_dataloader):
    best_model = RNN_Fst(input_dim, out_dim, hidden_dim, num_layer, dropout, rnn).to(device)
    best_model.load_state_dict(torch.load(best_model_path))
    best_model.eval()

    with torch.no_grad():
        
        best_model.eval()
        valid_losses = []

        trues = torch.tensor([]).to(device)
        preds = torch.tensor([]).to(device)
        
        for i, (features, labels) in enumerate(test_dataloader):
            
            features = features.to(device)
            labels = labels.to(device)

            out = best_model(features)

            trues = torch.cat((trues, labels), axis=0)
            preds = torch.cat((preds, out.squeeze().detach()), axis=0)
            
            loss = criterion(out.squeeze(), labels)

            valid_losses.append(loss.item())
        
        valid_loss = np.mean(valid_losses)
        rmse_error = rmse(preds, trues)
        mae_error = mae(preds, trues)

        print(f'valid loss {valid_loss}, rmse {rmse_error}, mae {mae_error}')
        
        return valid_loss, rmse_error.item(), mae_error.item()
        
test2(test_dataloader2)

valid loss 5.170142305140593, rmse 2.2744812965393066, mae 1.780599594116211


(5.170142305140593, 2.2744812965393066, 1.780599594116211)

In [31]:
# Save model
save_path = "../model"
best_model_path = os.path.join(save_path, 'lte_HO_cls_RNN.pt')
torch.save(classifier.state_dict(), best_model_path)

In [None]:
# Load model
# m_path = os.path.join('/home/wmnlab/Documents/sheng-ru/model', 'lte_HO_cls_RNN.pt')
# classifier = RNN(input_dim, out_dim, hidden_dim, num_layers, dropout, rnn)
# classifier.load_state_dict(torch.load(m_path))