In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold

In [5]:
simulation_data = pd.read_csv(
    "./data_csv_files/ideal_CPMG_data_for_dt.csv", index_col=0
)

In [6]:
vo_values = simulation_data.drop(columns=["stationarity", "noise_type"]).to_numpy()
print(f"n_samples: {vo_values.shape[0]}")
print(f"n_features: {vo_values.shape[1]}")


n_samples: 600
n_features: 9


In [7]:
sim_stationary_labels = simulation_data["stationarity"]
sim_noise_type_labels = simulation_data["noise_type"]

n_splits = 10
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

accuracy_stationary = []
accuracy_noise_type = []
feature_importances_stationary = []
feature_importances_noise_type = []

# K-Fold Cross Validation for stationarity
for train_index, test_index in kf.split(vo_values):
    train_data, test_data = vo_values[train_index], vo_values[test_index]
    train_stationary_labels, test_stationary_labels = (
        sim_stationary_labels.iloc[train_index],
        sim_stationary_labels.iloc[test_index],
    )

    decision_tree = RandomForestClassifier()
    decision_tree.fit(train_data, train_stationary_labels)

    accuracy = decision_tree.score(test_data, test_stationary_labels)
    accuracy_stationary.append(accuracy)
    feature_importances_stationary.append(decision_tree.feature_importances_)

# K-Fold Cross Validation for noise type
for train_index, test_index in kf.split(vo_values):
    train_data, test_data = vo_values[train_index], vo_values[test_index]
    train_noise_type_labels, test_noise_type_labels = (
        sim_noise_type_labels.iloc[train_index],
        sim_noise_type_labels.iloc[test_index],
    )

    decision_tree = RandomForestClassifier()
    decision_tree.fit(train_data, train_noise_type_labels)

    accuracy = decision_tree.score(test_data, test_noise_type_labels)
    accuracy_noise_type.append(accuracy)
    feature_importances_noise_type.append(decision_tree.feature_importances_)

# Compute average accuracy
avg_accuracy_stationary = np.mean(accuracy_stationary)
avg_accuracy_noise_type = np.mean(accuracy_noise_type)

# Compute average feature importances
avg_feature_importances_stationary = np.mean(feature_importances_stationary, axis=0)
avg_feature_importances_noise_type = np.mean(feature_importances_noise_type, axis=0)

print(f"Average test accuracy for stationarity: {avg_accuracy_stationary:.2f}")
print(
    f"Average feature importances for stationarity: {avg_feature_importances_stationary.round(2)}"
)

print(f"Average test accuracy for noise type: {avg_accuracy_noise_type:.2f}")
print(
    f"Average feature importances for noise type: {avg_feature_importances_noise_type.round(2)}"
)

Average test accuracy for stationarity: 0.97
Average feature importances for stationarity: [0.22 0.09 0.17 0.08 0.13 0.04 0.05 0.08 0.15]
Average test accuracy for noise type: 0.96
Average feature importances for noise type: [0.1  0.14 0.08 0.15 0.13 0.08 0.03 0.03 0.28]
