In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from joblib import load, dump
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.model_selection import train_test_split
import optuna

X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/international_fft.joblib')
y_test = load('../../BEST SET/international_labels.joblib') 

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2] 
    return data.reshape(num_samples, num_timesteps * num_channels) 


X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)  
# Split data into training/validation and test sets

base = {'n_estimators': 184, 'max_depth': 15, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}
subt = {'n_estimators': 292, 'max_depth': 46, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}
add = {'n_estimators': 280, 'max_depth': 27, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}
onlyA = {'n_estimators': 85, 'max_depth': 45, 'min_samples_split': 12, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}
AandB = {'n_estimators': 227, 'max_depth': 30, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}

rf = RandomForestClassifier(**base, n_jobs=-1, random_state=42)

# Train the classifier
rf.fit(X_train_val, y_train_val)

# Evaluation on test set
print("Test Stats")
predictions_test = rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
f1_test = f1_score(y_test, predictions_test)
print("F1 on Test Set:", f1_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))


print("Train Stats")
predictions_test_train = rf.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test_train)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test_train))
print(confusion_matrix(y_train_val, predictions_test_train))

Test Stats
Accuracy on Test Set: 0.8657171922685656
F1 on Test Set: 0.8718446601941747
              precision    recall  f1-score   support

           0       0.89      0.83      0.86       484
           1       0.85      0.90      0.87       499

    accuracy                           0.87       983
   macro avg       0.87      0.87      0.87       983
weighted avg       0.87      0.87      0.87       983

[[402  82]
 [ 50 449]]
Train Stats
Accuracy on Training Set: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       826
           1       1.00      1.00      1.00       826

    accuracy                           1.00      1652
   macro avg       1.00      1.00      1.00      1652
weighted avg       1.00      1.00      1.00      1652

[[826   0]
 [  0 826]]


In [None]:
window_test = load('Windowdata.joblib')
window_labels = load('Windowlabels.joblib')

window_test = reshape_data(np.array(window_test))

In [8]:
def get_triggers(sequence, max_gap=2):
    triggers = []
    in_trigger = False
    gap_count = 0
    start = None
    
    for i, val in enumerate(sequence):
        if val == 1:
            if not in_trigger:
                start = i
                in_trigger = True
                gap_count = 0
            else:
                gap_count = 0
        elif val == 0 and in_trigger:
            gap_count += 1
            if gap_count > max_gap:
                triggers.append((start, i - gap_count))
                in_trigger = False
                gap_count = 0
    
    if in_trigger:
        triggers.append((start, len(sequence) - 1))
    
    return triggers

# Function to count true positives (TP), false positives (FP), and false negatives (FN)
def count_triggers(predictions, actuals):
    TP, FP, FN = 0, 0, 0

    pred_triggers = get_triggers(predictions)
    actual_triggers = get_triggers(actuals)

    # Check for True Positives and False Negatives
    for actual in actual_triggers:
        actual_detected = False
        for pred in pred_triggers:
            if (pred[0] <= actual[1] and pred[1] >= actual[0]):
                TP += 1
                actual_detected = True
                break
        if not actual_detected:
            FN += 1

    # Check for False Positives
    for pred in pred_triggers:
        pred_detected = False
        for actual in actual_triggers:
            if (pred[0] <= actual[1] and pred[1] >= actual[0]):
                pred_detected = True
                break
        if not pred_detected:
            FP += 1

    return TP, FP, FN

predictions_test = rf.predict(window_test)
# Calculate TP, FP, FN
TP, FP, FN = count_triggers(predictions_test, window_labels)

print(f"True Positives (TP): {TP}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")

True Positives (TP): 1
False Positives (FP): 5
False Negatives (FN): 0


In [2]:
import matplotlib.pyplot as plt
def plot_data(test_set, predictions_test, y_test):
    for idx, example in enumerate(test_set):
        if predictions_test[idx] == y_test[idx]:
            continue
        fig, axs = plt.subplots(example.shape[0], sharex=True)
        axs[0].set_title(f"Wrong Classification, Should be {y_test[idx]}")
        axs[0].plot(example[0])
        axs[0].set_ylabel("HHE")
        axs[1].plot(example[1])
        axs[1].set_ylabel("HHN")
        axs[2].plot(example[2])
        axs[2].set_ylabel("HHZ")

        plt.xlabel("Time") 
        plt.show()

In [None]:
raw_X_test = load('../BEST SET/raw_X_test.joblib')

plot_data(raw_X_test, predictions_test, y_test)