In [36]:
import numpy as np
import pandas as pd
import joblib
import json
import os

# Define the path for a particular fold, feature selection method, and model type
fs_method = 'None'  # Example: 'None', 'LASSO', or 'SFS-CV'
model_type = 'RandomForest'  # Example: 'RandomForest', 'XGBoost', or 'StochasticXGBoost'
fold_number = 1  # Example: fold number

# Base directory
base_output_dir = 'model_results'

# Directory for the specified fold
fold_dir = os.path.join(base_output_dir, f'{fs_method}_{model_type}', f'outer_fold_{fold_number}')

# Check if the fold directory exists
if os.path.exists(fold_dir):
    print(f"Loading data from: {fold_dir}")
    
    # Load train/test indices
    train_indices = np.load(os.path.join(fold_dir, 'train_indices.npy'))
    test_indices = np.load(os.path.join(fold_dir, 'test_indices.npy'))
    print("Train Indices:", train_indices)
    print("Test Indices:", test_indices)
    
    # Load best hyperparameters
    with open(os.path.join(fold_dir, 'best_hyperparameters.json'), 'r') as f:
        best_hyperparameters = json.load(f)
    print("Best Hyperparameters:", best_hyperparameters)
    
    # Load selected features
    selected_features = pd.read_csv(os.path.join(fold_dir, 'selected_features.csv'))
    print("Selected Features:", selected_features)
    
    # Load saved model
    model = joblib.load(os.path.join(fold_dir, 'model.pkl'))
    print("Model loaded successfully.")
    
    # Load performance metrics
    with open(os.path.join(fold_dir, 'performance_metrics.json'), 'r') as f:
        performance_metrics = json.load(f)
    print("Performance Metrics:", performance_metrics)
    
    # Load confusion matrix
    confusion_matrix = np.load(os.path.join(fold_dir, 'confusion_matrix.npy'))
    print("Confusion Matrix:\n", confusion_matrix)
    
    # Load predictions
    predictions_df = pd.read_csv(os.path.join(fold_dir, 'predictions.csv'))
    print("Predictions:\n", predictions_df.head())
    
    # Load feature importances (if available)
    feature_importance_path = os.path.join(fold_dir, 'feature_importances.csv')
    if os.path.exists(feature_importance_path):
        feature_importances = pd.read_csv(feature_importance_path)
        print("Feature Importances:\n", feature_importances)
    
    # Load SHAP values for train and test sets (if applicable)
    shap_train_path = os.path.join(fold_dir, 'shap_values_train.pkl')
    shap_test_path = os.path.join(fold_dir, 'shap_values_test.pkl')
    
    if os.path.exists(shap_train_path) and os.path.exists(shap_test_path):
        shap_values_train = joblib.load(shap_train_path)
        shap_values_test = joblib.load(shap_test_path)
        print("SHAP Values for Train Set Loaded.")
        print("SHAP Values for Test Set Loaded.")
else:
    print(f"Directory for fold {fold_number} not found.")


Loading data from: model_results\None_RandomForest\outer_fold_1
Train Indices: [ 1  2  5  6  7  8  9 10 11 12 13 14]
Test Indices: [0 3 4]
Best Hyperparameters: {'n_estimators': 75, 'max_depth': 7, 'min_samples_split': 15}
Selected Features:                                         feature
0                             g__ADurb.Bin063-1
1                                g__Abiotrophia
2   g__Absconditabacteriales_(SR1)_unclassified
3                              g__Acetatifactor
4                                g__Acetobacter
5                               g__Acholeplasma
6                            g__Acidaminococcus
7                                g__Acidibacter
8                                  g__Acidipila
9                          g__Acidithiobacillus
10                g__Acidobacteria_unclassified
11            g__Acidobacteriaceae_unclassified
12             g__Acidobacteriales_unclassified
13                                g__Acidovorax
14                             g__Acin

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
