In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from joblib import load, dump
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import optuna

X_train_val = load('../Dump2/X_train_fft.joblib')
y_train_val = load('../Dump2/y_Train.joblib')
X_test = load('../Dump2/X_test_fft.joblib')
y_test = load('../Dump2/y_Test.joblib') 

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels) 


X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)  

params = {'n_estimators': 251, 'max_depth': 17, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}


# Split data into training/validation and test sets
rf = RandomForestClassifier(**params, random_state=42, n_jobs = -1)

# Train the classifier
rf.fit(X_train_val, y_train_val)

# Evaluate the best model on the test set
predictions_test = rf.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

Accuracy on Test Set: 0.8869565217391304
              precision    recall  f1-score   support

           0       0.83      0.97      0.90       230
           1       0.96      0.81      0.88       230

    accuracy                           0.89       460
   macro avg       0.90      0.89      0.89       460
weighted avg       0.90      0.89      0.89       460

[[222   8]
 [ 44 186]]


In [16]:
import pandas as pd
test_details = load('../Dump2/test_details.joblib') 


In [17]:
pd.DataFrame(test_details['Positive'])

Unnamed: 0,0,1,2,3,4
0,223,2014-08-11,2014-08-11T18:48:11.924000Z,A,2014-08-11T18:50:49.924000Z
1,223,2014-08-11,2014-08-11T15:22:52.933000Z,,2014-08-11T15:24:42.930000Z
2,222,2014-08-10,2014-08-10T17:26:09.517000Z,A,2014-08-10T17:26:46.517000Z
3,220,2014-08-08,2014-08-08T03:28:44.264000Z,B,2014-08-08T03:29:12.864000Z
4,201,2014-07-20,2014-07-20T08:15:09.795000Z,B,2014-07-20T08:17:20.794000Z
...,...,...,...,...,...
225,75,2012-03-15,2012-03-15T05:40:59.188000Z,B,2012-03-15T05:45:49.151000Z
226,73,2012-03-13,2012-03-13T06:50:00.870000Z,B,2012-03-13T06:51:26.752000Z
227,57,2012-02-26,2012-02-26T16:17:24.667000Z,A,2012-02-26T16:19:04.092000Z
228,56,2012-02-25,2012-02-25T20:34:31.127000Z,,2012-02-25T20:37:51.444000Z


In [11]:
X_test

array([[1.62907330e+07, 8.78279555e+04, 9.63846783e+03, ...,
        2.97175650e+03, 5.08195663e+03, 1.01698143e+03],
       [1.59793390e+07, 7.00980092e+04, 4.75052925e+04, ...,
        1.89221622e+03, 7.91623486e+03, 2.85091260e+03],
       [1.66174480e+07, 1.08939247e+05, 1.87053841e+04, ...,
        3.21126093e+03, 4.35616696e+03, 4.29591845e+03],
       ...,
       [1.60610440e+07, 8.12135768e+04, 4.71158921e+04, ...,
        4.32028281e+03, 2.62598041e+03, 4.23311748e+03],
       [1.50286320e+07, 1.03354322e+05, 6.01816980e+04, ...,
        5.54975856e+03, 8.67907517e+03, 6.93215953e+03],
       [1.73258010e+07, 1.27371077e+05, 8.82917522e+04, ...,
        7.11470409e+03, 7.09293871e+03, 9.70076024e+03]])

In [6]:
import matplotlib.pyplot as plt
def plot_data(test_set, predictions_test, y_test):
    for idx, example in enumerate(test_set):
        if predictions_test[idx] == y_test[idx]:
            continue
        fig, axs = plt.subplots(example.shape[0], sharex=True)
        axs[0].set_title(f"Wrong Classification, Should be {y_test[idx]}")
        axs[0].plot(example[0])
        axs[0].set_ylabel("HHE")
        axs[1].plot(example[1])
        axs[1].set_ylabel("HHN")
        axs[2].plot(example[2])
        axs[2].set_ylabel("HHZ")

        plt.xlabel("Time") 
        plt.savefig(f'./Visualisations/{idx}.png')
        plt.show()

In [9]:
def plot_correct_data(test_set, predictions_test, y_test):
    for idx, example in enumerate(test_set):
        if predictions_test[idx] == y_test[idx]:    
            fig, axs = plt.subplots(example.shape[0], sharex=True)
            axs[0].set_title(f"Right Classification, {y_test[idx]}")
            axs[0].plot(example[0])
            axs[0].set_ylabel("HHE")
            axs[1].plot(example[1])
            axs[1].set_ylabel("HHN")
            axs[2].plot(example[2])
            axs[2].set_ylabel("HHZ")

            plt.xlabel("Time") 
            plt.show()

In [None]:
plot_correct_data(test_set, predictions_test, y_test)

In [None]:
test_set = load('../Dump2/raw_X_test.joblib')
plot_data(test_set, predictions_test, y_test)