In [1]:
import shap
import pandas as pd
import pickle
import copy
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from alibi.explainers import AnchorTabular
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import  train_test_split
from sklearn.feature_selection import SelectFromModel

  from .autonotebook import tqdm as notebook_tqdm
2024-07-03 15:39:12.595793: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
failures = pd.read_csv('../data/model_data/failures.csv',sep=',')
components = failures['Component'].unique()

In [4]:
encoder = LabelEncoder()
for component in components:
    globals()[f"{component}_df"] = pd.read_csv(f'../data/model_data/labelled_data_{component}.csv',sep=',')
    globals()[f"{component}_df"]['Turbine_ID'] = encoder.fit_transform(['Turbine_ID']*globals()[f"{component}_df"].shape[0])
    # set the date as the index
    globals()[f"{component}_df"] = globals()[f"{component}_df"].set_index('Timestamp')

In [5]:
class_target_name = "Failure (Target)"
for component in components:
    X = globals()[f"{component}_df"].drop(columns=['Component',class_target_name])
    y = globals()[f"{component}_df"][class_target_name]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    globals()[f"{component}_X_train"] = X_train
    globals()[f"{component}_X_test"] = X_test
    globals()[f"{component}_y_train"] = y_train
    globals()[f"{component}_y_test"] = y_test

In [6]:
model_name = "xgb"

In [7]:
for component in components:

    with open("../main_pipeline/model/selected-{}_{}.pickle".format(model_name, component), "rb") as f:
      globals()[f"{model_name}_{component}"] = pickle.load(f)

In [7]:
with open("../main_pipeline/model/selected-xgb.pickle", "rb") as f:
    model = pickle.load(f)

In [None]:
for component in components:
    globals()[f"{component}_feature_selector"] = globals()[f"{model_name}_{component}"].named_steps['selector']
    globals()[f"{component}_selected_features"] = globals()[f"{component}_X_train"].columns[globals()[f"{component}_feature_selector"].get_support()]
    globals()[f"{component}_selected_features_train"] = globals()[f"{component}_X_train"][globals()[f"{component}_selected_features"]]
    globals()[f"{component}_selected_features_test"] = globals()[f"{component}_X_test"][globals()[f"{component}_selected_features"]]
    globals()[f"{component}_selected_features_train"].reset_index(drop=True, inplace=True)
    globals()[f"{component}_selected_features_test"].reset_index(drop=True, inplace=True)



In [8]:
model.fit(GEARBOX_X_train, GEARBOX_y_train)

In [10]:
params = model.get_params()
selector = SelectFromModel(model, threshold=-np.inf, prefit=True, max_features=params['max_features'])
selected_features = GEARBOX_X_train.columns[selector.get_support()]
selected_features_train = GEARBOX_X_train[selected_features]
selected_features_test = GEARBOX_X_test[selected_features]
selected_features_train.reset_index(drop=True, inplace=True)
selected_features_test.reset_index(drop=True, inplace=True)




In [11]:
model.fit(selected_features_train, GEARBOX_y_train)

In [14]:
predit_fn = lambda x: model.predict(selector.transform(x))
explainer = AnchorTabular(predit_fn, selected_features_train.columns)
explainer.fit(selected_features_train.to_numpy())

AnchorTabular(meta={
  'name': 'AnchorTabular',
  'type': ['blackbox'],
  'explanations': ['local'],
  'params': {'seed': None, 'disc_perc': (25, 50, 75)},
  'version': '0.9.7.dev0'}
)

In [259]:
import numpy as np
import pandas as pd

def create_predict_fn(component_name):
    def predict_fn(X):
        try:
            # Debug information
            print(f"Component: {component_name}")
            print(f"Input type: {type(X)}")
            print(f"Input shape: {X.shape}")
            print(f"Input dtype: {X.dtype}")
            
            # Ensure X is 2D
            if X.ndim == 1:
                X = X.reshape(1, -1)
            
            # Convert to DataFrame
            feature_names = globals()[f"{component_name}_selected_features_train"].columns
            X_df = pd.DataFrame(X, columns=feature_names)
            
            # Get predictions
            model = globals()[f"{model_name}_{component_name}"]
            predictions = model.predict_proba(X_df)
            
            print(f"Predictions shape: {predictions.shape}")
            print(f"Predictions dtype: {predictions.dtype}")
            
            return predictions.astype(np.float32)
        except Exception as e:
            print(f"Error in predict_fn for {component_name}: {str(e)}")
            raise

for component in components:
    # Fit the model
    globals()[f"{model_name}_{component}"].fit(globals()[f"{component}_selected_features_train"], globals()[f"{component}_y_train"])

    # Create and assign predict_fn
    globals()[f"{component}_predict_fn"] = create_predict_fn(component)

    # Define feature names
    globals()[f"{component}_feature_names"] = globals()[f"{component}_selected_features_train"].columns.tolist()

    # Create explainer
    globals()[f"{component}_explainer"] = AnchorTabular(globals()[f"{component}_predict_fn"], globals()[f"{component}_feature_names"])

    # Fit the explainer
    try:
        globals()[f"{component}_explainer"].fit(globals()[f"{component}_selected_features_train"].to_numpy())
        print(f"Successfully fitted explainer for {component}")
    except Exception as e:
        print(f"Error fitting explainer for {component}: {str(e)}")

Successfully fitted explainer for GEARBOX
Successfully fitted explainer for GENERATOR
Successfully fitted explainer for HYDRAULIC_GROUP
Successfully fitted explainer for GENERATOR_BEARING
Successfully fitted explainer for TRANSFORMER


In [265]:
# remove GEnerator bearing from components
components = [x for x in components if x != "GENERATOR_BEARING"]


In [266]:

for component in components:
    globals()[f"{component}_predict_fn"] = lambda x: globals() [f"{model_name}_{component}"].predict_proba(x)
    globals()[f"{component}_feature_names"] = globals()[f"{component}_selected_features_train"].columns.to_list()
    globals()[f"{component}_explainer"] = AnchorTabular(globals()[f"{component}_predict_fn"], globals()[f"{component}_feature_names"])
    globals()[f"{component}_explainer"].fit(globals()[f"{component}_selected_features_train"].to_numpy())




In [280]:
GEARBOX_predict_fn = lambda x: xgb_GEARBOX.predict_proba(x)
GEARBOX_feature_names = GEARBOX_selected_features_train.columns.to_list()
GEARBOX_explainer = AnchorTabular(GEARBOX_predict_fn, GEARBOX_feature_names)
GEARBOX_explainer.fit(GEARBOX_selected_features_train.to_numpy())

AnchorTabular(meta={
  'name': 'AnchorTabular',
  'type': ['blackbox'],
  'explanations': ['local'],
  'params': {'seed': None, 'disc_perc': (25, 50, 75)},
  'version': '0.9.7.dev0'}
)

In [15]:
X_test_np = selected_features_test.to_numpy()
test_length = X_test_np.shape[0]

In [16]:
def time_convert(sec):
  mins = sec // 60
  sec = sec % 60
  hours = mins // 60
  mins = mins % 60
  return "Time Lapsed = {0}:{1}:{2}".format(int(hours),int(mins),sec)

In [None]:

start_time = time.time()
anchors = []
for i, dp in enumerate(X_test_np):
    anchors.append(explainer.explain(dp, threshold=0.95))
    current_time = time.time()
    time_lapsed = current_time - start_time
    print("{} von {} Schritten abgeschlossen. Zeit: {}".format(i, test_length, time_convert(time_lapsed)), end="\r", flush=True)

In [284]:
print("Explainer predictor after setting:", GEARBOX_explainer.predictor)

Explainer predictor after setting: <alibi.utils.wrappers.ArgmaxTransformer object at 0x16224c620>
