### Clear memory

In [1]:
%reset -f
import gc
gc.collect()

0

### Import 

In [2]:
import sys, torch, shap, os, joblib
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
from anthropic import Anthropic
from xgboost import XGBClassifier
from dotenv import load_dotenv
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

In [4]:
sys.path.append('..')
from src.scripts.data_utils import TEPDataLoader, filter_csv
from src.scripts.feature_engineering import create_lag_features, create_rolling_features, create_diff_features

# Data Loading
Load preprocessed Tennessee Eastman Process data from the previous analysis.

In [5]:
loader = TEPDataLoader(
    raw_data_path='../data/raw',
    processed_data_path='../data/processed',
)

# # keep this commented, if first notebook was run
# loader.convert_and_save_to_csv()


In [6]:
SELECTED_FAULTS = [0, 1, 13, 16]
MAX_SIMULATION = 50
files = ['TEP_fault_free_testing', 'TEP_faulty_testing']

# # keep this commented, if first notebook was run
# for f in files:
#     filter_csv(f, SELECTED_FAULTS, MAX_SIMULATION, data_path='../data/processed')
#     print(f'File {f} has been filtered and saved')

In [7]:
df_test = pd.read_csv('../data/processed/TEP_faulty_testing_filtered.csv')

# Data Preprocessing
Prepare datasets and create binary fault indicators for anomaly detection.

In [8]:
df_test['faultNumber'] = df_test['faultNumber'].astype(int)

In [9]:
df_test['faulty'] = (df_test['faultNumber'] > 0) & (df_test['sample'] > 160)

In [10]:
SELECTED_FEATURE_COLUMNS = joblib.load('../models/selected_features.pkl')

# Feature Engineering
Apply time series feature engineering (lag, rolling, differencing) to capture temporal patterns.

In [11]:
df_test = create_lag_features(data=df_test, lags=[1,2], columns=SELECTED_FEATURE_COLUMNS, group_by='simulationRun', dropna=True)

In [12]:
df_test = create_rolling_features(data=df_test, window_sizes=[3], columns=SELECTED_FEATURE_COLUMNS, group_by='simulationRun', dropna=True)

In [13]:
df_test = create_diff_features(data=df_test, columns=SELECTED_FEATURE_COLUMNS, group_by='simulationRun', dropna=True)

In [14]:
ALL_FEATURE_COLUMNS = joblib.load('../models/all_features.pkl')

In [15]:
scaler = joblib.load('../models/scaler.pkl')

X_test = scaler.transform(df_test[ALL_FEATURE_COLUMNS])

# Model Loading
Load the trained XGBoost model from the previous notebook for explainability analysis.

In [16]:
anomaly_detector = XGBClassifier(n_jobs=-1, random_state=42)
anomaly_detector.load_model('../models/xgb_model.json')

# SHAP Feature Importance Analysis
SHAP (SHapley Additive exPlanations) provides feature-level explanations for individual predictions, helping us understand which process variables contribute most to anomaly detection decisions.
### Initialize SHAP Explainer
Create a TreeExplainer optimized for XGBoost models to calculate feature contributions.

In [17]:
explainer = shap.TreeExplainer(anomaly_detector)

### Select Test Sample
Choose a specific test instance to demonstrate the explanation process.

In [18]:
test_idx = 1234
test_sample = X_test[test_idx:test_idx+1]

### Extract Prediction Confidence
Get the model's confidence score for anomaly detection on the selected sample.

In [19]:
confidence = anomaly_detector.predict_proba(test_sample)[0, 1]

### Calculate SHAP Values
Compute SHAP values that quantify each feature's contribution to the final prediction.

In [20]:
shap_values = explainer(test_sample).values

### Process Variable Descriptions
Map feature codes to human-readable descriptions of industrial process variables for better interpretability.

In [21]:
FEATURE_DESCRIPTIONS = {
    'xmeas_1': 'A feed (stream 1)',
    'xmeas_2': 'D feed (stream 2)',
    'xmeas_3': 'E feed (stream 3)',
    'xmeas_4': 'A and C feed (stream 4)',
    'xmeas_5': 'Recycle flow (stream 8)',
    'xmeas_6': 'Reactor feed rate (stream 6)',
    'xmeas_7': 'Reactor pressure',
    'xmeas_8': 'Reactor level',
    'xmeas_9': 'Reactor temperature',
    'xmeas_10': 'Purge rate (stream 9)',
    'xmeas_11': 'Product separator temperature',
    'xmeas_12': 'Product separator level',
    'xmeas_13': 'Product separator pressure',
    'xmeas_14': 'Product separator underflow (stream 10)',
    'xmeas_15': 'Stripper level',
    'xmeas_16': 'Stripper pressure',
    'xmeas_17': 'Stripper underflow (stream 11)',
    'xmeas_18': 'Stripper temperature',
    'xmeas_19': 'Stripper steam flow',
    'xmeas_20': 'Compressor work',
    'xmeas_21': 'Reactor cooling water outlet temperature',
    'xmeas_22': 'Separator cooling water outlet temperature',
    'xmeas_23': 'A composition in reactor feed (stream 6)',
    'xmeas_24': 'B composition in reactor feed (stream 6)',
    'xmeas_25': 'C composition in reactor feed (stream 6)',
    'xmeas_26': 'D composition in reactor feed (stream 6)',
    'xmeas_27': 'E composition in reactor feed (stream 6)',
    'xmeas_28': 'F composition in reactor feed (stream 6)',
    'xmeas_29': 'A composition in purge gas (stream 9)',
    'xmeas_30': 'B composition in purge gas (stream 9)',
    'xmeas_31': 'C composition in purge gas (stream 9)',
    'xmeas_32': 'D composition in purge gas (stream 9)',
    'xmeas_33': 'E composition in purge gas (stream 9)',
    'xmeas_34': 'F composition in purge gas (stream 9)',
    'xmeas_35': 'G composition in purge gas (stream 9)',
    'xmeas_36': 'H composition in purge gas (stream 9)',
    'xmeas_37': 'D composition in product (stream 11)',
    'xmeas_38': 'E composition in product (stream 11)',
    'xmeas_39': 'F composition in product (stream 11)',
    'xmeas_40': 'G composition in product (stream 11)',
    'xmeas_41': 'H composition in product (stream 11)',
    'xmv_1': 'D feed flow valve (stream 2)',
    'xmv_2': 'E feed flow valve (stream 3)',
    'xmv_3': 'A feed flow valve (stream 1)',
    'xmv_4': 'A and C feed flow valve  (stream 4)',
    'xmv_5': 'Compressor recycle valve',
    'xmv_6': 'Purge valve (stream 9)',
    'xmv_7': 'Separator pot liquid flow valve (stream 10)',
    'xmv_8': 'Stripper liquid product flow valve (stream 11)',
    'xmv_9': 'Stripper steam valve',
    'xmv_10': 'Reactor cooling water flow',
    'xmv_11': 'Condenser cooling water flow',
    'xmv_12': 'Agitator speed',
}


### Aggregate Feature Impacts
Group engineered features (lag, rolling, diff) by their base process variables to simplify explanations and focus on the most impactful process measurements.

In [22]:
base_feature_impacts = {}

for i, feature in enumerate(ALL_FEATURE_COLUMNS):
    impact = shap_values[0,i]
    base_feature = '_'.join(feature.split('_')[:2])
    base_description = FEATURE_DESCRIPTIONS[base_feature]

    if base_feature not in base_feature_impacts:
        base_feature_impacts[base_feature] = {'total_impact': 0, 'description': base_description}

    base_feature_impacts[base_feature]['total_impact'] += impact


### Rank Most Influential Variables
Sort process variables by their total contribution to the prediction and select the top 3 for concise, actionable explanations.

In [23]:
sorted_groups = sorted(base_feature_impacts.items(), key=lambda x: x[1]['total_impact'], reverse=True)

In [24]:
sorted_groups = sorted_groups[:3]

# Local LLM Experimentation
This section explores using local large language models to generate natural language explanations for XGBoost anomaly predictions. We test FLAN-T5 models with various prompt engineering techniques to create operator-friendly explanations.

## Prompt Engineering Strategy
The prompt incorporates the top 3 most impactful SHAP features and model confidence scores to provide context-aware explanations for industrial operators.

### Dynamic Context Generation
Generate risk-level context based on model confidence thresholds to prioritize operator attention appropriately.

In [25]:
if confidence >= 0.75:
    context = f'HIGH RISK: Model detected anomaly with {confidence:.1%} confidence.\n'
    context += 'Key factors: '
elif confidence >= 0.5:
    context = f'MODERATE RISK: Model detected anomaly with {confidence:.1%} confidence.\n'
    context += 'Key factors: '
else:
    context = f'NORMAL OPERATION: Model indicates normal conditions ({confidence:.1%} anomaly probability).\n'

### Feature Integration
Append the most influential process variables to provide specific context about which systems require attention.

In [26]:
factors = []
for i, (base_feature, data) in enumerate(sorted_groups):
    total_impact = data['total_impact']
    description = data['description']

    if confidence >= 0.5:
        factors.append(description)

context += ', '.join(factors)

Look at the context part of the prompt 

In [27]:
print(context)

HIGH RISK: Model detected anomaly with 100.0% confidence.
Key factors: Stripper steam valve, Stripper pressure, Stripper steam flow


## Prompt Design with Few-Shot Learning
We implement few-shot learning with domain-specific examples covering different fault scenarios. The prompt structure follows an ISSUE-CAUSE-ACTION format to provide actionable guidance for operators. Given FLAN-T5's 512-token context limit, we constrain the prompt to ~400 tokens to allow 100 tokens for generation.

In [28]:
prompt = f"""You are Tennessee Eastman plant anomaly expert. Analyze ONLY the key factors provided. Use format: ISSUE-CAUSE-ACTION.

Example 1:
NORMAL: 22.2% anomaly probability
Analysis: Normal operation. Continue monitoring.

Example 2:
HIGH RISK: 98.0% confidence
Key factors: Reactor temperature, Reactor cooling water outlet temperature, Reactor cooling water flow
Analysis: ISSUE: Reactor thermal imbalance. CAUSE: All three cooling parameters show coordinated deviation indicating cooling system malfunction. ACTION: Immediately verify cooling water valve position and increase flow rate.

Example 3:
HIGH RISK: 100.0% confidence
Key factors: A feed (stream 1), A and C feed (stream 4), A composition in reactor feed
Analysis: ISSUE: Feed system disruption. CAUSE: Multiple feed streams showing simultaneous changes suggests upstream supply issue. ACTION: Check feed source pressure and verify stream 1 control valve.

Example 4:
HIGH RISK: 99.0% confidence
Key factors: Product separator pressure, Product separator level, Product separator temperature
'Analysis: ISSUE: Separator control instability. CAUSE: Pressure, level, and temperature deviating simultaneously indicates control system failure. ACTION: Switch to manual control and check pressure relief valve.'

Example 5:
HIGH RISK: Model detected anomaly with 87.0% confidence.
Key factors: Stripper steam flow, Stripper steam valve, Stripper temperature
Analysis: ISSUE: Stripper process degradation. CAUSE: Gradual efficiency loss in separation unit. ACTION: Increase steam flow gradually and schedule maintenance inspection.

Current anomaly:
{context}
Analysis:"""


In [29]:
print(prompt)

You are Tennessee Eastman plant anomaly expert. Analyze ONLY the key factors provided. Use format: ISSUE-CAUSE-ACTION.

Example 1:
NORMAL: 22.2% anomaly probability
Analysis: Normal operation. Continue monitoring.

Example 2:
HIGH RISK: 98.0% confidence
Key factors: Reactor temperature, Reactor cooling water outlet temperature, Reactor cooling water flow
Analysis: ISSUE: Reactor thermal imbalance. CAUSE: All three cooling parameters show coordinated deviation indicating cooling system malfunction. ACTION: Immediately verify cooling water valve position and increase flow rate.

Example 3:
HIGH RISK: 100.0% confidence
Key factors: A feed (stream 1), A and C feed (stream 4), A composition in reactor feed
Analysis: ISSUE: Feed system disruption. CAUSE: Multiple feed streams showing simultaneous changes suggests upstream supply issue. ACTION: Check feed source pressure and verify stream 1 control valve.

Example 4:
HIGH RISK: 99.0% confidence
Key factors: Product separator pressure, Product

## Model Implementation
Testing both FLAN-T5 Base and Large models to evaluate the trade-off between computational requirements and explanation quality.

### Model Loading and Configuration
Load the pre-trained FLAN-T5 model and configure it for inference on industrial process data.

In [30]:
model_name = 'google/flan-t5-large'

In [31]:
model = T5ForConditionalGeneration.from_pretrained(model_name);
model.eval();

### Token Management
Load tokenizer for this model

In [32]:
tokenizer = T5Tokenizer.from_pretrained(model_name);

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Tokenize input sequence 

In [33]:
inputs = tokenizer(
    prompt,
    return_tensors='pt',
    max_length=512,
    truncation=True,
    padding=True,
)

Monitor prompt length to ensure it fits within the model's context window while preserving essential information.

In [34]:
num_tokens = inputs['input_ids'].shape[1]
print(f'The prompt length is {num_tokens} tokens')
if num_tokens > 410:
    print('Prompt might be too large!')

The prompt length is 388 tokens


### Generation Parameters
Configure generation parameters to balance creativity and factual accuracy, with temperature and sampling settings tuned for technical explanations.

In [35]:
with torch.inference_mode():
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=100,
        temperature=0.5,
        do_sample=True,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
        repetition_penalty=1.2,
        top_p=0.8,
        top_k=40,
    )


Encode tokenized generated output sequence

In [36]:
explanation = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(explanation)

ISSUE: Stripper process degradation. CAUSE: Gradual efficiency loss in separation unit. ACTION: Increase steam flow gradually and schedule maintenance inspection.


## Experimental Results and Limitations
This experiment reveals significant challenges with local model deployment for domain-specific explanation tasks.

**Observed Issues:**
- **Hallucination**: Models generate plausible but incorrect technical details
- **Template copying**: Outputs closely mirror few-shot examples without adaptation
- **Feature ignorance**: Generated explanations fail to incorporate provided SHAP insights
- **Domain knowledge gaps**: Lack of specialized industrial process understanding

**Resource Constraints:**
Hardware limitations prevent using larger models (FLAN-T5 XL) that might address these quality issues. Fine-tuning approaches are computationally prohibitive for this setup.

**Strategic Decision:**
These limitations justify transitioning to a more powerful API-based solution (Anthropic Claude) that can provide higher-quality, domain-aware explanations while maintaining practical deployment feasibility.

# External LLM Experimentation
To address the limitations of local models, we transition to Anthropic's Claude 3 Haiku, which offers superior domain reasoning capabilities while maintaining fast response times suitable for real-time industrial applications.

## Enhanced Prompt Engineering Strategy
Claude's larger context window enables more sophisticated prompt design with richer domain context and detailed formatting instructions, while maintaining compatibility with our SHAP-based feature importance approach.

### Risk-Level Context Generation
Implement consistent risk categorization to ensure operator responses align with system criticality levels.

In [37]:
if confidence >= 0.75:
    context = f'ALERT: {confidence:.1%} confidence - Key factors: '
elif confidence >= 0.5:
    context = f'CAUTION: {confidence:.1%} confidence - Key factors: '
else:
    context = f'NORMAL: {confidence:.1%} anomaly probability'


### SHAP Feature Integration
Maintain the same feature importance methodology to ensure consistent analytical foundation across different LLM implementations.

In [38]:
factors = []
for i, (base_feature, data) in enumerate(sorted_groups):
    total_impact = data['total_impact']
    description = data['description']

    if confidence >= 0.5:
        factors.append(description)

context += ', '.join(factors)

In [39]:
print(context)

ALERT: 100.0% confidence - Key factors: Stripper steam valve, Stripper pressure, Stripper steam flow


## Advanced Prompt Design
The enhanced prompt incorporates industrial domain knowledge, structured output formatting, and comprehensive few-shot examples. This approach leverages Claude's reasoning capabilities while maintaining practical operator-focused guidance.

**Domain Context:**
provide essential background about the Tennessee Eastman Process to improve technical accuracy of generated explanations.

**Structured Output Format:**
define clear response templates (STATUS/ISSUE/ROOT CAUSE/IMMEDIATE ACTION/MONITORING) to ensure consistent, actionable operator guidance.

**Comprehensive Examples:**
include diverse fault scenarios with varying confidence levels to demonstrate expected response patterns across different operational states.

In [40]:
prompt = f"""You are a Tennessee Eastman Process control engineer analyzing plant anomalies. Your role is to provide actionable technical analysis for plant operators.

CONTEXT: Tennessee Eastman is a chemical process with reactor, separator, stripper, and recycle streams producing products G and H from reactants A, C, D, E.

ANALYSIS FORMAT:
- STATUS: [NORMAL (less than 50% confidence) / CAUTION (between 50% and 75% confidence) / ALERT (more than 75% confidence)]
- ISSUE: Brief technical description (one sentence)
- ROOT CAUSE: Most likely physical/chemical cause (one sentence)
- IMMEDIATE ACTION: Single most critical operator step
- MONITORING: One key parameter to track

EXAMPLES:

Input: NORMAL: 15.2% anomaly probability
Output: STATUS: NORMAL - Continue routine monitoring of all process variables.

Input: CAUTION: 65.8% confidence - Key factors: Reactor temperature, Cooling water outlet temperature
Output: STATUS: CAUTION
ISSUE: Reactor thermal management deviation detected
ROOT CAUSE: Cooling water system efficiency reduction or heat duty increase
IMMEDIATE ACTION: Verify cooling water flow rates and heat exchanger performance
MONITORING: Track reactor temperature trend

Input: ALERT: 94.3% confidence - Key factors: Product separator pressure, Product separator level, Product separator temperature
Output: STATUS: ALERT
ISSUE: Product separator control system failure detected
ROOT CAUSE: Multiple control loops failing simultaneously indicating instrumentation malfunction
IMMEDIATE ACTION: Switch separator to manual control and verify pressure relief systems
MONITORING: Product separator pressure

CURRENT ANALYSIS:
{context}
Output:"""


In [41]:
print(prompt)

You are a Tennessee Eastman Process control engineer analyzing plant anomalies. Your role is to provide actionable technical analysis for plant operators.

CONTEXT: Tennessee Eastman is a chemical process with reactor, separator, stripper, and recycle streams producing products G and H from reactants A, C, D, E.

ANALYSIS FORMAT:
- STATUS: [NORMAL (less than 50% confidence) / CAUTION (between 50% and 75% confidence) / ALERT (more than 75% confidence)]
- ISSUE: Brief technical description (one sentence)
- ROOT CAUSE: Most likely physical/chemical cause (one sentence)
- IMMEDIATE ACTION: Single most critical operator step
- MONITORING: One key parameter to track

EXAMPLES:

Input: NORMAL: 15.2% anomaly probability
Output: STATUS: NORMAL - Continue routine monitoring of all process variables.

Input: CAUTION: 65.8% confidence - Key factors: Reactor temperature, Cooling water outlet temperature
Output: STATUS: CAUTION
ISSUE: Reactor thermal management deviation detected
ROOT CAUSE: Cooling

## API Implementation
Integrate with Anthropic's API using secure authentication and optimized generation parameters for technical content.

In [42]:
load_dotenv()
client = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))

### Model Configuration
Configure Claude 3 Haiku with conservative temperature settings to prioritize factual accuracy over creative variation in technical explanations.

In [43]:
response = client.messages.create(
    model='claude-3-haiku-20240307',
    max_tokens=200,
    messages=[{'role': 'user', 'content': prompt}],
    temperature=0.3,
)

Look at the response 

In [44]:
print(response.content[0].text)

STATUS: ALERT
ISSUE: Stripper steam system malfunction detected
ROOT CAUSE: Stripper steam valve failure or steam supply disruption
IMMEDIATE ACTION: Manually control stripper steam flow to maintain pressure setpoint
MONITORING: Stripper steam flow and pressure


## Comparative Analysis and Results

**Performance Improvements:**
- **Response Quality**: Significantly more coherent and technically accurate explanations
- **Domain Awareness**: Better understanding of chemical process relationships
- **Consistency**: Reliable adherence to structured output format
- **Speed**: Sub-second response times compared to 15-20 seconds locally

**Trade-off Considerations:**
- **Infrastructure Dependency**: Requires stable internet connectivity for operation
- **Cost Implications**: API usage costs vs. local computational resources
- **Data Privacy**: External processing considerations for industrial data

**Practical Deployment Value:**
The quality improvement justifies the infrastructure requirements for production industrial monitoring systems where accurate, timely explanations directly impact operational safety and efficiency.

### Implementation Recommendation
For industrial anomaly detection systems, the API-based approach provides superior explainability capabilities that outweigh the connectivity requirements, making it the recommended solution for production deployment.

# Summary

## Objective
This notebook addresses the critical challenge of explainability in industrial anomaly detection systems by integrating machine learning predictions with natural language explanations for plant operators.

## Technical Approach
- **Base Model**: utilized pre-trained XGBoost anomaly detector from previous analysis
- **Feature Importance**: applied SHAP analysis to identify most influential process variables for each prediction
- **Explanation Generation**: implemented two LLM approaches for natural language explanation generation

## Methodology
1. **SHAP Integration**: extract feature-level contributions for individual anomaly predictions
2. **Local LLM Testing**: experimented with FLAN-T5 Base/Large models using few-shot learning
3. **API-Based Solution**: transitioned to Anthropic Claude 3 Haiku for improved explanation quality

## Key Findings
- **Local Model Limitations**: FLAN-T5 models suffered from hallucination, template copying, and insufficient domain knowledge
- **Resource Constraints**: hardware limitations prevented use of larger local models that might improve performance
- **API Solution Benefits**: Claude 3 Haiku provided significantly better explanation quality with structured, actionable operator guidance