In [1]:
from temporora import * 

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

# Assuming metrics is defined before this loop
for dataset in datasets:
    print(f"Processing dataset: {dataset}")
    args.sub_dataset = dataset  # Set current dataset

    # Load data for training, validation, and testing
    train_loader, valid_loader, test_loader, test_loader_last, \
        num_test_windows, train_visualize, engine_id = get_dataloader(
            dir_path=args.dataset_root,
            sub_dataset=args.sub_dataset,
            max_rul=args.max_rul,
            seq_length=args.sequence_len,
            batch_size=args.batch_size,
            use_exponential_smoothing=args.use_exponential_smoothing,
            smooth_rate=args.smooth_rate)

    # Directly use the specified hyperparameters
    best_params = {
        'n_estimators': 200,
        'max_depth': 20,
        'min_samples_split': 5,
        'min_samples_leaf': 5
    }

    # Train and evaluate using the specified hyperparameters
    rf_classifier = RandomForestClassifier(
        n_estimators=best_params["n_estimators"],
        max_depth=best_params["max_depth"],
        min_samples_split=best_params["min_samples_split"],
        min_samples_leaf=best_params["min_samples_leaf"],
        random_state=42
    )

    # Prepare training data
    train_x, train_y = [], []
    for x, y in train_loader:
        train_x.append(x.view(-1, args.sequence_len * args.feature_num).numpy())
        train_y.append((y.numpy() < 0.24).astype(int))  # Convert y to binary

    # Convert lists to arrays
    train_x = np.vstack(train_x)  # Stack along rows
    train_y = np.concatenate(train_y, axis=0)  # Concatenate along rows

    # Ensure y is a 1D array
    train_y = train_y.ravel()

    # Fit the model
    rf_classifier.fit(train_x, train_y)

    # Test the model
    test_predictions, test_actuals = [], []
    for x, y in test_loader:
        x = x.view(-1, args.sequence_len * args.feature_num).numpy()
        y = (y.numpy() < 0.24).astype(int)
        preds = rf_classifier.predict(x)
        test_predictions.extend(preds)
        test_actuals.extend(y)

    test_predictions = np.array(test_predictions)
    test_actuals = np.array(test_actuals)

    # Compute Metrics
    test_accuracy = accuracy_score(test_actuals, test_predictions)
    test_precision = precision_score(test_actuals, test_predictions)
    test_recall = recall_score(test_actuals, test_predictions)
    test_f1 = f1_score(test_actuals, test_predictions)
    test_roc_auc = roc_auc_score(test_actuals, test_predictions)

    # Store metrics
    metrics[dataset] = {
        "Best Hyperparameters": best_params,
        "Test Accuracy": test_accuracy,
        "Test Precision": test_precision,
        "Test Recall": test_recall,
        "Test F1 Score": test_f1,
        "Test ROC-AUC": test_roc_auc
    }

    # Print metrics
    print(f"Dataset {dataset} Metrics:")
    print(f"Best Hyperparameters: {best_params}")
    print(f"Test - Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}, ROC-AUC: {test_roc_auc:.4f}")


Processing dataset: FD001
Dataset FD001 Metrics:
Best Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_split': 5, 'min_samples_leaf': 5}
Test - Accuracy: 0.9095, Precision: 0.9444, Recall: 0.6800, F1 Score: 0.7907, ROC-AUC: 0.8333
Processing dataset: FD002
Dataset FD002 Metrics:
Best Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_split': 5, 'min_samples_leaf': 5}
Test - Accuracy: 0.8737, Precision: 0.8646, Recall: 0.5533, F1 Score: 0.6748, ROC-AUC: 0.7632
Processing dataset: FD003
Dataset FD003 Metrics:
Best Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_split': 5, 'min_samples_leaf': 5}
Test - Accuracy: 0.9520, Precision: 0.9419, Recall: 0.8100, F1 Score: 0.8710, ROC-AUC: 0.8988
Processing dataset: FD004
Dataset FD004 Metrics:
Best Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_split': 5, 'min_samples_leaf': 5}
Test - Accuracy: 0.8043, Precision: 0.6667, Recall: 0.2000, F1 Score: 0.3077, ROC-AUC: 0.5