In [2]:
from temporora import * 

In [None]:
# Function for Optuna optimization
def optimize(trial, dataset, train_loader, valid_loader, args):
    # Suggest hyperparameters for SVC
    C = trial.suggest_loguniform('C', 1e-5, 1e5)  # Regularization parameter
    gamma = trial.suggest_loguniform('gamma', 1e-5, 1e1)  # Kernel coefficient
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid'])  # Kernel type

    # Initialize SVM with the suggested hyperparameters
    svm_classifier = SVC(C=C, gamma=gamma, kernel=kernel, probability=True, random_state=42)

    # Prepare training data
    train_x, train_y = [], []
    for x, y in train_loader:
        train_x.extend(x.view(-1, args.sequence_len * args.feature_num).numpy())
        train_y.extend((y.numpy() < 0.24).astype(int))  # Convert y to binary

    train_x = np.array(train_x)
    train_y = np.array(train_y)

    # Fit the model
    svm_classifier.fit(train_x, train_y)

    # Validate on validation set
    valid_predictions, valid_actuals = [], []
    for x, y in valid_loader:
        x = x.view(-1, args.sequence_len * args.feature_num).numpy()
        y = (y.numpy() < 0.24).astype(int)
        preds = svm_classifier.predict(x)
        valid_predictions.extend(preds)
        valid_actuals.extend(y)

    valid_predictions = np.array(valid_predictions)
    valid_actuals = np.array(valid_actuals)

    # Compute F1 score as the optimization metric
    f1 = f1_score(valid_actuals, valid_predictions)
    return f1  # Optuna will maximize this score

# Assuming metrics is defined before this loop
for dataset in datasets:
    print(f"Processing dataset: {dataset}")
    args.sub_dataset = dataset  # Set current dataset

    # Load data for training, validation, and testing
    train_loader, valid_loader, test_loader, test_loader_last, \
        num_test_windows, train_visualize, engine_id = get_dataloader(
            dir_path=args.dataset_root,
            sub_dataset=args.sub_dataset,
            max_rul=args.max_rul,
            seq_length=args.sequence_len,
            batch_size=args.batch_size,
            use_exponential_smoothing=args.use_exponential_smoothing,
            smooth_rate=args.smooth_rate)

    # Optuna Study for Hyperparameter Optimization
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: optimize(trial, dataset, train_loader, valid_loader, args), n_trials=20)  # Number of trials

    # Best parameters found by Optuna
    print(f"Best hyperparameters for {dataset}: {study.best_params}")
    best_params = study.best_params

    # Train and evaluate using the best hyperparameters
    svm_classifier = SVC(C=best_params["C"], gamma=best_params["gamma"], kernel=best_params["kernel"], probability=True, random_state=42)

    # Prepare training data
    train_x, train_y = [], []
    for x, y in train_loader:
        train_x.append(x.view(-1, args.sequence_len * args.feature_num).numpy())
        train_y.append((y.numpy() < 0.24).astype(int))  # Convert y to binary

    train_x = np.vstack(train_x)
    train_y = np.concatenate(train_y, axis=0)
    train_y = train_y.ravel()

    # Fit the model
    svm_classifier.fit(train_x, train_y)

    # Validate and Test
    valid_predictions, valid_actuals = [], []
    test_predictions, test_actuals = [], []

    # Validation set
    for i, (x, y) in enumerate(valid_loader):
        x = x.view(-1, args.sequence_len * args.feature_num).numpy()
        y = (y.numpy() < 0.24).astype(int)
        preds = svm_classifier.predict(x)
        valid_predictions.extend(preds)
        valid_actuals.extend(y)

    # Test set
    for i, (x, y) in enumerate(test_loader):
        x = x.view(-1, args.sequence_len * args.feature_num).numpy()
        y = (y.numpy() < 0.24).astype(int)
        preds = svm_classifier.predict(x)
        test_predictions.extend(preds)
        test_actuals.extend(y)

    # Post-process predictions
    test_predictions = np.array(test_predictions)
    test_actuals = np.array(test_actuals)
    valid_predictions = np.array(valid_predictions)
    valid_actuals = np.array(valid_actuals)

    # Compute Metrics
    valid_accuracy = accuracy_score(valid_actuals, valid_predictions)
    valid_precision = precision_score(valid_actuals, valid_predictions)
    valid_recall = recall_score(valid_actuals, valid_predictions)
    valid_f1 = f1_score(valid_actuals, valid_predictions)
    valid_roc_auc = roc_auc_score(valid_actuals, valid_predictions)

    test_accuracy = accuracy_score(test_actuals, test_predictions)
    test_precision = precision_score(test_actuals, test_predictions)
    test_recall = recall_score(test_actuals, test_predictions)
    test_f1 = f1_score(test_actuals, test_predictions)
    test_roc_auc = roc_auc_score(test_actuals, test_predictions)

    # Store metrics in the metrics dictionary
    metrics[dataset] = {
        "Validation Accuracy": valid_accuracy,
        "Validation Precision": valid_precision,
        "Validation Recall": valid_recall,
        "Validation F1 Score": valid_f1,
        "Validation ROC-AUC": valid_roc_auc,
        "Test Accuracy": test_accuracy,
        "Test Precision": test_precision,
        "Test Recall": test_recall,
        "Test F1 Score": test_f1,
        "Test ROC-AUC": test_roc_auc
    }

    # Print metrics
    print(f"Dataset {dataset} Metrics:")
    print(f"Validation - Accuracy: {valid_accuracy:.4f}, Precision: {valid_precision:.4f}, Recall: {valid_recall:.4f}, F1 Score: {valid_f1:.4f}, ROC-AUC: {valid_roc_auc:.4f}")
    print(f"Test - Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}, F1 Score: {test_f1:.4f}, ROC-AUC: {test_roc_auc:.4f}")


Processing dataset: FD001


[I 2024-11-24 13:44:56,342] A new study created in memory with name: no-name-fdb8b14a-ff78-436a-bfa6-062538f58f8f
  C = trial.suggest_loguniform('C', 1e-5, 1e5)  # Regularization parameter
  gamma = trial.suggest_loguniform('gamma', 1e-5, 1e1)  # Kernel coefficient
  y = column_or_1d(y, warn=True)
[I 2024-11-24 13:45:18,259] Trial 0 finished with value: 0.8730964467005076 and parameters: {'C': 467.5253633785387, 'gamma': 0.7674343897657696, 'kernel': 'poly'}. Best is trial 0 with value: 0.8730964467005076.
  C = trial.suggest_loguniform('C', 1e-5, 1e5)  # Regularization parameter
  gamma = trial.suggest_loguniform('gamma', 1e-5, 1e1)  # Kernel coefficient
  y = column_or_1d(y, warn=True)
[I 2024-11-24 13:45:35,942] Trial 1 finished with value: 0.8786885245901639 and parameters: {'C': 91831.25340692948, 'gamma': 9.320211473758795e-05, 'kernel': 'sigmoid'}. Best is trial 1 with value: 0.8786885245901639.
  C = trial.suggest_loguniform('C', 1e-5, 1e5)  # Regularization parameter
  gamma =

Best hyperparameters for FD001: {'C': 0.14979920180089687, 'gamma': 0.0002309901317154338, 'kernel': 'linear'}
Dataset FD001 Metrics:
Validation - Accuracy: 0.9795, Precision: 0.9428, Recall: 0.9333, F1 Score: 0.9380, ROC-AUC: 0.9610
Test - Accuracy: 0.9235, Precision: 0.9579, Recall: 0.7280, F1 Score: 0.8273, ROC-AUC: 0.8586
Processing dataset: FD002


[I 2024-11-24 14:47:37,469] A new study created in memory with name: no-name-f4b5d546-77dd-4c8f-bfe4-5b591016edff
  C = trial.suggest_loguniform('C', 1e-5, 1e5)  # Regularization parameter
  gamma = trial.suggest_loguniform('gamma', 1e-5, 1e1)  # Kernel coefficient
  y = column_or_1d(y, warn=True)
