In [1]:
import os
import pandas as pd
import numpy as np

def calculate_F1_metrics(file):
    data = pd.read_csv(file)
    ground_truth = data.loc[:, "Groud_Truth"].to_numpy()
    prediction = data.loc[:, "Prediction"].to_numpy()
    #a (hits)
    true_positive = np.sum(np.logical_and(ground_truth >= ALGAL_BLOOM_THRESHOLD, prediction >= ALGAL_BLOOM_THRESHOLD))
    
    #b (false alarms/false positive)
    false_positive = np.sum(np.logical_and(ground_truth < ALGAL_BLOOM_THRESHOLD, prediction >= ALGAL_BLOOM_THRESHOLD))
    
    #c (misses)
    false_negative = np.sum(np.logical_and(ground_truth >= ALGAL_BLOOM_THRESHOLD, prediction < ALGAL_BLOOM_THRESHOLD))
    
    #d (correct nonevents)
    true_negative = np.sum(np.logical_and(ground_truth < ALGAL_BLOOM_THRESHOLD, prediction < ALGAL_BLOOM_THRESHOLD))
    
    return true_positive, false_positive, true_negative, false_negative



In [14]:
ALGAL_BLOOM_THRESHOLD = 40.3

In [15]:
## For LSTM result
print('For RFR result')
#folder = "data/Result/LSTM"
folder = "data/Result/RFR"
dataset = "NRE" # "NRE" or "Scripps Pier"

#rfr_path = "data/Result/RFR/Station_"
#svr_path = "data/Result/SVR/Station_"
#lstm_path = "data/Result/LSTM/Station_" ## for WA-LSTM
#ann_path = "data/Result/Wavelet_ANN/Station_" ## for WA-BPNN

if __name__ == "__main__":
    assert dataset in ["NRE", "Scripps Pier"], "dataset should be NRE or Scripps Pier!"
    file_list = os.listdir(folder)

    if dataset == "NRE":
        file_list = [file for file in file_list if "Station" in file]
    else:
        file_list = [file for file in file_list if "Scripps" in file]
    
    true_positive, false_positive, true_negative, false_negative = 0, 0, 0, 0
    for file in file_list:
        result = calculate_F1_metrics(os.path.join(folder, file))
        true_positive += result[0] #a 
        false_positive += result[1] #b
        true_negative += result[2] #d
        false_negative += result[3] #c
    POD = true_positive / (true_positive + false_negative) # POD=a/(a+c)
    POFD = false_positive / (false_positive + true_negative) #POFD=b/(b+d)
    print(false_positive)
    print(true_negative)

    B = (true_positive + false_positive) / (true_positive + false_negative)
    PSS = POD - POFD
    F1 = 2*true_positive / (2*true_positive + false_positive + false_negative)
    print(f"true_positive: {true_positive}\nfalse_positive: {false_positive}\nfalse_negative: {false_negative}\ntrue_negative: {true_negative}")
    print(f"POD: {POD}\nPOFD: {POFD}\nB: {B}\nPSS: {PSS}\nF1: {F1}")
    

For RFR result
0
791
true_positive: 0
false_positive: 0
false_negative: 27
true_negative: 791
POD: 0.0
POFD: 0.0
B: 0.0
PSS: 0.0
F1: 0.0


In [16]:
## For LSTM result
print('For SVR result')
#folder = "data/Result/LSTM"
folder = "data/Result/SVR"
dataset = "NRE" # "NRE" or "Scripps Pier"

#rfr_path = "data/Result/RFR/Station_"
#svr_path = "data/Result/SVR/Station_"
#lstm_path = "data/Result/LSTM/Station_" ## for WA-LSTM
#ann_path = "data/Result/Wavelet_ANN/Station_" ## for WA-BPNN

if __name__ == "__main__":
    assert dataset in ["NRE", "Scripps Pier"], "dataset should be NRE or Scripps Pier!"
    file_list = os.listdir(folder)

    if dataset == "NRE":
        file_list = [file for file in file_list if "Station" in file]
    else:
        file_list = [file for file in file_list if "Scripps" in file]
    
    true_positive, false_positive, true_negative, false_negative = 0, 0, 0, 0
    for file in file_list:
        result = calculate_F1_metrics(os.path.join(folder, file))
        true_positive += result[0]
        false_positive += result[1]
        true_negative += result[2]
        false_negative += result[3]
    POD = true_positive / (true_positive + false_negative)
    POFD = false_positive / (false_positive + true_negative)
    B = (true_positive + false_positive) / (true_positive + false_negative)
    PSS = POD - POFD
    F1 = 2*true_positive / (2*true_positive + false_positive + false_negative)
    print(f"true_positive: {true_positive}\nfalse_positive: {false_positive}\nfalse_negative: {false_negative}\ntrue_negative: {true_negative}")
    print(f"POD: {POD}\nPOFD: {POFD}\nB: {B}\nPSS: {PSS}\nF1: {F1}")
    

For SVR result
true_positive: 0
false_positive: 0
false_negative: 27
true_negative: 791
POD: 0.0
POFD: 0.0
B: 0.0
PSS: 0.0
F1: 0.0


In [17]:
## For LSTM result
print('For WA-LSTM result')
#folder = "data/Result/LSTM"
folder = "data/Result/LSTM"
dataset = "NRE" # "NRE" or "Scripps Pier"

#rfr_path = "data/Result/RFR/Station_"
#svr_path = "data/Result/SVR/Station_"
#lstm_path = "data/Result/LSTM/Station_" ## for WA-LSTM
#ann_path = "data/Result/Wavelet_ANN/Station_" ## for WA-BPNN

if __name__ == "__main__":
    assert dataset in ["NRE", "Scripps Pier"], "dataset should be NRE or Scripps Pier!"
    file_list = os.listdir(folder)

    if dataset == "NRE":
        file_list = [file for file in file_list if "Station" in file]
    else:
        file_list = [file for file in file_list if "Scripps" in file]
    
    true_positive, false_positive, true_negative, false_negative = 0, 0, 0, 0
    for file in file_list:
        result = calculate_F1_metrics(os.path.join(folder, file))
        true_positive += result[0]
        false_positive += result[1]
        true_negative += result[2]
        false_negative += result[3]
    POD = true_positive / (true_positive + false_negative)
    POFD = false_positive / (false_positive + true_negative)
    B = (true_positive + false_positive) / (true_positive + false_negative)
    PSS = POD - POFD
    F1 = 2*true_positive / (2*true_positive + false_positive + false_negative)
    print(f"true_positive: {true_positive}\nfalse_positive: {false_positive}\nfalse_negative: {false_negative}\ntrue_negative: {true_negative}")
    print(f"POD: {POD}\nPOFD: {POFD}\nB: {B}\nPSS: {PSS}\nF1: {F1}")
    

For WA-LSTM result
true_positive: 9
false_positive: 5
false_negative: 16
true_negative: 910
POD: 0.36
POFD: 0.00546448087431694
B: 0.56
PSS: 0.35453551912568304
F1: 0.46153846153846156


In [18]:
## For LSTM result
print('For WA-BPNN result')
folder = "data/Result/Only_Wavelet_ANN"
dataset = "NRE" # "NRE" or "Scripps Pier"

if __name__ == "__main__":
    assert dataset in ["NRE", "Scripps Pier"], "dataset should be NRE or Scripps Pier!"
    file_list = os.listdir(folder)

    if dataset == "NRE":
        file_list = [file for file in file_list if "Station" in file]
    else:
        file_list = [file for file in file_list if "Scripps" in file]
    
    true_positive, false_positive, true_negative, false_negative = 0, 0, 0, 0
    for file in file_list:
        result = calculate_F1_metrics(os.path.join(folder, file))
        true_positive += result[0]
        false_positive += result[1]
        true_negative += result[2]
        false_negative += result[3]
    POD = true_positive / (true_positive + false_negative)
    POFD = false_positive / (false_positive + true_negative)
    B = (true_positive + false_positive) / (true_positive + false_negative)
    PSS = POD - POFD
    F1 = 2*true_positive / (2*true_positive + false_positive + false_negative)
    print(f"true_positive: {true_positive}\nfalse_positive: {false_positive}\nfalse_negative: {false_negative}\ntrue_negative: {true_negative}")
    print(f"POD: {POD}\nPOFD: {POFD}\nB: {B}\nPSS: {PSS}\nF1: {F1}")
    

For WA-BPNN result
true_positive: 16
false_positive: 7
false_negative: 9
true_negative: 908
POD: 0.64
POFD: 0.007650273224043716
B: 0.92
PSS: 0.6323497267759562
F1: 0.6666666666666666


In [19]:
## For LSTM result
print('For WA-BPNN result')
folder = "data/Result/Wavelet_ANN"
dataset = "NRE" # "NRE" or "Scripps Pier"

if __name__ == "__main__":
    assert dataset in ["NRE", "Scripps Pier"], "dataset should be NRE or Scripps Pier!"
    file_list = os.listdir(folder)

    if dataset == "NRE":
        file_list = [file for file in file_list if "Station" in file]
    else:
        file_list = [file for file in file_list if "Scripps" in file]
    
    true_positive, false_positive, true_negative, false_negative = 0, 0, 0, 0
    for file in file_list:
        result = calculate_F1_metrics(os.path.join(folder, file))
        true_positive += result[0]
        false_positive += result[1]
        true_negative += result[2]
        false_negative += result[3]
    POD = true_positive / (true_positive + false_negative)
    POFD = false_positive / (false_positive + true_negative)
    B = (true_positive + false_positive) / (true_positive + false_negative)
    PSS = POD - POFD
    F1 = 2*true_positive / (2*true_positive + false_positive + false_negative)
    print(f"true_positive: {true_positive}\nfalse_positive: {false_positive}\nfalse_negative: {false_negative}\ntrue_negative: {true_negative}")
    print(f"POD: {POD}\nPOFD: {POFD}\nB: {B}\nPSS: {PSS}\nF1: {F1}")
    

For WA-BPNN result
true_positive: 16
false_positive: 6
false_negative: 9
true_negative: 909
POD: 0.64
POFD: 0.006557377049180328
B: 0.88
PSS: 0.6334426229508197
F1: 0.6808510638297872
