Uji Statistik antara LSTM, GRU, dan TCNN Versi Yang Pakai Walk Forward

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load hasil prediksi semua model
y_true = np.load("y_true.npy")
y_pred_gru = np.load("y_pred_gru.npy")
y_pred_lstm = np.load("y_pred_lstm.npy")
y_pred_tcnn = np.load("y_pred_tcnn.npy")

# Fungsi hitung metrik + std error
def summarize(label, y_true, y_pred):
    errors = np.abs(y_true - y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    stderr = np.std(errors) / np.sqrt(len(errors))
    print(f"üìä {label}")
    print(f"  MAE  : {mae:.4f} ¬± {stderr:.4f}")
    print(f"  RMSE : {rmse:.4f}")
    print(f"  R¬≤    : {r2:.4f}")
    print("-" * 30)
    return errors

# Ringkasan metrik
err_gru = summarize("GRU", y_true, y_pred_gru)
err_lstm = summarize("LSTM", y_true, y_pred_lstm)
err_tcnn = summarize("TCNN", y_true, y_pred_tcnn)

# Fungsi t-test antar model
def compare_models(err1, err2, name1, name2):
    t_stat, p_val = ttest_rel(err1, err2)
    print(f"üîç Paired t-test {name1} vs {name2}:")
    print(f"  t-statistic = {t_stat:.4f}")
    print(f"  p-value     = {p_val:.4f}")
    if p_val < 0.05:
        print(f"  ‚úÖ SIGNIFIKAN: {name1} ‚â† {name2} (p < 0.05)")
    else:
        print(f"  ‚ùå TIDAK signifikan: {name1} ‚âà {name2} (p ‚â• 0.05)")
    print("-" * 50)

compare_models(err_gru, err_lstm, "GRU", "LSTM")
compare_models(err_gru, err_tcnn, "GRU", "TCNN")
compare_models(err_lstm, err_tcnn, "LSTM", "TCNN")

# üîç Boxplot error comparison
plt.figure(figsize=(8, 5))
plt.boxplot([err_gru, err_lstm, err_tcnn], labels=["GRU", "LSTM", "TCNN"])
plt.title("Absolute Error Distribution per Model")
plt.ylabel("Absolute Error")
plt.grid(True)
plt.show()

# üìà Histogram of pairwise error differences
plt.figure(figsize=(14, 4))
plt.subplot(1, 3, 1)
plt.hist(err_gru - err_lstm, bins=30, color='gray')
plt.title("GRU - LSTM Error Diff")

plt.subplot(1, 3, 2)
plt.hist(err_gru - err_tcnn, bins=30, color='gray')
plt.title("GRU - TCNN Error Diff")

plt.subplot(1, 3, 3)
plt.hist(err_lstm - err_tcnn, bins=30, color='gray')
plt.title("LSTM - TCNN Error Diff")

plt.tight_layout()
plt.show()

# üí° Optional: Cohen‚Äôs d for effect size
def cohens_d(x, y):
    diff = x - y
    return np.mean(diff) / np.std(diff, ddof=1)

print("üìè Effect Size (Cohen‚Äôs d):")
print(f"  GRU vs LSTM : {cohens_d(err_gru, err_lstm):.4f}")
print(f"  GRU vs TCNN : {cohens_d(err_gru, err_tcnn):.4f}")
print(f"  LSTM vs TCNN: {cohens_d(err_lstm, err_tcnn):.4f}")


Uji Statistik antara LSTM, GRU, dan TCNN Versi Yang Pakai Expanding Windowing

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load hasil prediksi semua model (expanding step=5)
y_true = np.load("y_true_lstm_step5.npy")
y_pred_lstm = np.load("y_pred_lstm_step5.npy")
y_pred_gru = np.load("y_pred_gru_step5.npy")
y_pred_tcnn = np.load("y_pred_tcnn_step5.npy")

# Fungsi hitung metrik + standard error
def summarize(label, y_true, y_pred):
    errors = np.abs(y_true - y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    stderr = np.std(errors) / np.sqrt(len(errors))
    print(f"üìä {label}")
    print(f"  MAE  : {mae:.4f} ¬± {stderr:.4f}")
    print(f"  RMSE : {rmse:.4f}")
    print(f"  R¬≤    : {r2:.4f}")
    print("-" * 30)
    return errors

# Ringkasan metrik untuk tiap model
err_gru = summarize("GRU", y_true, y_pred_gru)
err_lstm = summarize("LSTM", y_true, y_pred_lstm)
err_tcnn = summarize("TCNN", y_true, y_pred_tcnn)

# Paired t-test antar model
def compare_models(err1, err2, name1, name2):
    t_stat, p_val = ttest_rel(err1, err2)
    print(f"üîç Paired t-test {name1} vs {name2}:")
    print(f"  t-statistic = {t_stat:.4f}")
    print(f"  p-value     = {p_val:.4f}")
    if p_val < 0.05:
        print(f"  ‚úÖ SIGNIFIKAN: {name1} ‚â† {name2} (p < 0.05)")
    else:
        print(f"  ‚ùå TIDAK signifikan: {name1} ‚âà {name2} (p ‚â• 0.05)")
    print("-" * 50)

compare_models(err_gru, err_lstm, "GRU", "LSTM")
compare_models(err_gru, err_tcnn, "GRU", "TCNN")
compare_models(err_lstm, err_tcnn, "LSTM", "TCNN")

# Boxplot Error
plt.figure(figsize=(8, 5))
plt.boxplot([err_gru, err_lstm, err_tcnn], labels=["GRU", "LSTM", "TCNN"])
plt.title("Absolute Error Distribution per Model (Expanding Step=5)")
plt.ylabel("Absolute Error")
plt.grid(True)
plt.show()

# Histogram selisih error antar model
plt.figure(figsize=(14, 4))
plt.subplot(1, 3, 1)
plt.hist(err_gru - err_lstm, bins=30, color='gray')
plt.title("GRU - LSTM Error Diff")

plt.subplot(1, 3, 2)
plt.hist(err_gru - err_tcnn, bins=30, color='gray')
plt.title("GRU - TCNN Error Diff")

plt.subplot(1, 3, 3)
plt.hist(err_lstm - err_tcnn, bins=30, color='gray')
plt.title("LSTM - TCNN Error Diff")

plt.tight_layout()
plt.show()

# Cohen's d
def cohens_d(x, y):
    diff = x - y
    return np.mean(diff) / np.std(diff, ddof=1)

print("üìè Effect Size (Cohen‚Äôs d):")
print(f"  GRU vs LSTM : {cohens_d(err_gru, err_lstm):.4f}")
print(f"  GRU vs TCNN : {cohens_d(err_gru, err_tcnn):.4f}")
print(f"  LSTM vs TCNN: {cohens_d(err_lstm, err_tcnn):.4f}")
