## This module is used for testing the trained SVM model on the test data

In [1]:
import os
import IPython
import IPython.display
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
import pickle
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier

In [None]:
batch_size=64
relative_rw = [0.2, 0.3, 0.4, 0.5, 0.6 ,0.7, 0.8]
test_accuracies = []
for i in range(len(relative_rw)):
    file_str = f"RWs_H_g_2_tadv_5min_rw_{relative_rw[i]}"
    file_str_test = "RWs_H_g_2_tadv_5min_rw_0.5"

    dt_save_name= os.getcwd() + "/model_saves_dt" + "/" +  "/best_model_" + file_str + ".pkl"
    metrics_save_name = os.getcwd() + "/metric_saves_dt" + "/" + file_str + ".txt"

    data_test = np.load(file_str_test+".npz")

    for vars in data_test:
        print(vars)
    
    wave_data_train=data_test["wave_data_train"]
    wave_data_test=data_test["wave_data_test"]
    label_train=data_test["label_train"]
    label_test=data_test["label_test"]

    print(wave_data_train.shape)
    print(wave_data_test.shape)

    x_test = wave_data_test.reshape((wave_data_test.shape[0], wave_data_test.shape[1] * wave_data_test.shape[2]))

    clf = DecisionTreeClassifier(random_state = 0)

    with open(dt_save_name,'rb') as f:
        clf = pickle.load(f)
    
    label_pred = clf.predict(x_test)

    confusion_matrix = metrics.confusion_matrix(label_test, label_pred)
    print('Confusion matrix')
    print(confusion_matrix)
    print('---------------')
    print('Precision:', metrics.precision_score(label_test, label_pred))
    print('Recall:', metrics.recall_score(label_test, label_pred))
    print('F1 Score:', metrics.f1_score(label_test, label_pred))

    lines = ['Confusion matrix\n', f"{confusion_matrix}\n", "---------------\n", 
             f" Precision:, {metrics.precision_score(label_test, label_pred)}\n", 
             f" Recall:, {metrics.recall_score(label_test, label_pred)}\n",         
             f" F1 Score:, {metrics.f1_score(label_test, label_pred)}\n"]
    
    with open(metrics_save_name, "w") as f:
        f.writelines(lines)
    
    group_names = ['Correctly predicted','Incorrectly predicted', 'Incorrectly predicted','Correctly predicted']
    group_counts = ["{0:0.0f}".format(value) for value in
                    confusion_matrix.flatten()]
    group_percentages = ["{0:.2%}".format(value) for value in
                            confusion_matrix.flatten()/np.sum(confusion_matrix)]
    labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
                zip(group_names,group_counts,group_percentages)]
    labels = np.asarray(labels).reshape(2,2)
    yaxislabels = ['Rogue waves absent','Rogue waves present']
    xaxislabels = ['Predicted as absent','Predicted as present']
    plt.figure(figsize=[6,6])
    s = sns.heatmap(confusion_matrix, annot=labels, yticklabels=yaxislabels, xticklabels=xaxislabels, fmt='', cmap='Blues')
    s.set_xlabel("Predicted label", fontsize = 10)
    s.set_ylabel("True label", fontsize=10)
    filename=os.getcwd()+'/confusion_matrices_dt'+'/'+file_str+'.jpg'
    plt.savefig(filename,dpi=199)

**The same file can be used for testing of the trained DT models for the different scenarios.**

### **Sample correlation matrices for the forecasting process of the DT model using different proportions of rogue wave samples in the training process is provided here.**

<table>
  <tr>
    <td><img src = "lw_dt_RWs_H_g_2_tadv_5min_rw_0.4.jpg" width="400"> </td>
    <td><img src = "lw_dt_RWs_H_g_2_tadv_5min_rw_0.5.jpg" width="400"> </td>
    <td><img src = "lw_dt_RWs_H_g_2_tadv_5min_rw_0.6.jpg" width="400"> </td>
  </tr>
</table>

- **The correlation matrix plots here display the rogue forecasting efficiency for the DT model in a specific scenario where $t_{advance}$=5 minutes and $t_{training}$=20 minutes.**
- **The plots represent the forecasting results when different proportions of rogue wave windows are utilized in the training process.**
- **It can be observed that as the proportions of rogue wave examples in the training data increases($rw_{prop}$=0.6), better predictions can be attained for the rogue wave instances. However, the trained model in this case does not perform well for non rogue-wave instances. For lower proportions($rw_{prop}$=0.4), better predictions are observed for the non rogue-wave instances.**
- **The best overall accuracy is thus, observed for equal proportions of rogue wave and non rogue-wave instances in the training process (51%). However, if prediction of rogue waves is more crtical, we can opt for a model trained with larger number of rogue wave training examples.**