In [34]:
import pickle
import pandas as pd
import numpy as np
import shap
from datetime import datetime
import joblib

In [35]:
# Consider new reading as
new_reading = {
    "Timestamp": '8/3/2021  4:26:38 PM',
    "OxEnRa": 4.5,  # Increased oxygen enrichment - will contribute to high SI
    "BlFuPeIn": 25.0,  # Increased blast furnace pressure index
    "EnOxFl": 15000.0,  # Higher enriched oxygen flow
    "CoBlFl": 75.0,  # Higher coal blast flow
    "BlMo": 160.0,  # Higher blast momentum
    "BlFuBoGaVo": 8000.0,  # Higher blast furnace bottom gas volume
    "BlFuBoGaIn": 85.0,  # Higher blast furnace bottom gas index
    "ThCoTe": 2300.0,  # Higher theoretical combustion temperature
    "ToGaPr": 250.0,  # Higher top gas pressure
    "EnOxPr": 1.8,  # Higher enriched oxygen pressure
    "CoBlPr": 0.6,  # Higher coal blast pressure
    "ToPrDr": 220.0,  # Higher top pressure drop
    "HoBlPr": 0.5,  # Higher hot blast pressure
    "AcBlVe": 280.0,  # Higher actual blast velocity
    "CoBlTe": 250.0,  # Higher coal blast temperature
    "HoBlTe": 1100.0,  # Higher hot blast temperature
    "ToTe": 230.0,  # Higher top temperature
    "BlHu": 20.0,  # Higher blast humidity
    "CoInSeVa": 50.0,  # Higher coke index and set value
    "FoSI": 0.9,  # Higher forecast SI - critical high threshold
    "HoBl": 1100.0,  # Higher hot blast
    "ToGasP": 240.0,  # Higher top gas pressure
    "CoBF": 75.0,  # Higher coke blast furnace
    "SI_lag1": 0.88,  # High lagged SI value
}

In [36]:
#converting the new reading to a DataFrame
new_reading_df = pd.DataFrame([new_reading])
#loding the preprocessing pipeline
with open('preprocessing_pipeline.pkl', 'rb') as f:
    preprocessing_pipeline = pickle.load(f)
# Apply preprocessing steps
def preprocess_data(df, pipeline):
    # Selecting feature columns
    df = df[pipeline['feature_columns']]
    # Fill null values
    df.fillna(pipeline['null_fill_values'], inplace=True)
    
    # Scale features
    df[pipeline['feature_columns']] = pipeline['scaler'].transform(df[pipeline['feature_columns']])
    
    return df

# Preprocess the new reading
new_reading_processed = preprocess_data(new_reading_df, preprocessing_pipeline)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.fillna(pipeline['null_fill_values'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[pipeline['feature_columns']] = pipeline['scaler'].transform(df[pipeline['feature_columns']])


In [37]:
new_reading_processed.head()

Unnamed: 0,OxEnRa,BlFuPeIn,EnOxFl,CoBlFl,BlMo,BlFuBoGaVo,BlFuBoGaIn,ThCoTe,ToGaPr,EnOxPr,...,CoBlTe,HoBlTe,ToTe,BlHu,CoInSeVa,FoSI,HoBl,ToGasP,CoBF,SI_lag1
0,6.186376,8.349047,-2.683261,109.636787,1.437173,0.655422,5.260941,2.878517,6.317686,2.180037,...,8.82853,0.475124,1.24104,4.778321,1.424068,5.483952,0.467971,2.653002,72.672068,4.00636


### Step 1: Set Up the Anomaly Detection System

In [38]:
#importing existing model
model = joblib.load('Final_model.pkl')

In [39]:
#SI Statistics from the model code
training_stats = {
    'si_mean': 0.46,
    'si_std': 0.11,
    'si_min': 0.18,
    'si_max': 0.72
}

## Step 2: Create the Anomaly Detection Class

In [40]:
class RealTimeAnomalyDetector:
    def __init__(self, model, training_stats):
        self.model = model
        self.training_stats = training_stats
        
        # Initialize SHAP explainer (if using tree-based model)
        self.explainer = shap.TreeExplainer(model)
        
        # Define anomaly thresholds
        self.setup_thresholds()
    
    def setup_thresholds(self):
        """Define what constitutes an anomaly"""
        mean = self.training_stats['si_mean']
        std = self.training_stats['si_std']
        
        # Statistical thresholds (2-sigma rule)
        self.upper_threshold = mean + 2 * std
        self.lower_threshold = mean - 2 * std
        
        # Business thresholds (adjust based on domain knowledge)
        self.critical_high = 0.85  # Furnace too hot
        self.critical_low = 0.25   # Furnace too cold
        
        # Severe deviation threshold
        self.severe_threshold = 0.15  # Absolute deviation
    

    def detect_anomaly(self, new_data_point):
        """
        Main anomaly detection function
        new_data_point: dict, pandas Series, or DataFrame with feature values
        """
        try:
            # Convert to DataFrame and ensure correct column order
            if isinstance(new_data_point, dict):
                input_data = pd.DataFrame([new_data_point])
            elif isinstance(new_data_point, pd.Series):
                input_data = pd.DataFrame([new_data_point])
            else:
                input_data = new_data_point

            # Ensure columns match training features
            if hasattr(self, 'feature_columns'):
                # If you pass feature_columns to the class, use it
                input_data = input_data[self.feature_columns]
            elif hasattr(self.model, 'feature_names_in_'):
                # For sklearn 1.0+
                input_data = input_data[self.model.feature_names_in_]

            # Make prediction
            predicted_si = self.model.predict(input_data)[0]

            # Classify anomaly type
            anomaly_info = self._classify_anomaly(predicted_si)

            if anomaly_info['is_anomaly']:
                # Generate explanations and recommendations
                explanations = self._explain_prediction(input_data.iloc[0])
                recommendations = self._generate_recommendations(explanations, predicted_si)

                return {
                    'timestamp': datetime.now().isoformat(),
                    'predicted_si': round(predicted_si, 4),
                    'anomaly_detected': True,
                    'anomaly_type': anomaly_info['type'],
                    'severity': anomaly_info['severity'],
                    'explanations': explanations,
                    'recommendations': recommendations,
                    'alert_message': self._create_alert_message(predicted_si, anomaly_info)
                }
            else:
                return {
                    'timestamp': datetime.now().isoformat(),
                    'predicted_si': round(predicted_si, 4),
                    'anomaly_detected': False,
                    'status': 'Normal operation',
                    'alert_message': f"SI prediction: {predicted_si:.3f}% - Within normal range"
                }

        except Exception as e:
            return {
                'error': f"Prediction failed: {str(e)}",
                'timestamp': datetime.now().isoformat()
            }
    
    def _classify_anomaly(self, predicted_si):
        """Classify the type and severity of anomaly"""
        
        anomaly_info = {'is_anomaly': False, 'type': None, 'severity': 'normal'}
        
        # Check critical thresholds first
        if predicted_si >= self.critical_high:
            anomaly_info = {
                'is_anomaly': True,
                'type': 'critical_high_si',
                'severity': 'critical'
            }
        elif predicted_si <= self.critical_low:
            anomaly_info = {
                'is_anomaly': True,
                'type': 'critical_low_si',
                'severity': 'critical'
            }
        # Check statistical thresholds
        elif predicted_si > self.upper_threshold:
            anomaly_info = {
                'is_anomaly': True,
                'type': 'high_si',
                'severity': 'moderate'
            }
        elif predicted_si < self.lower_threshold:
            anomaly_info = {
                'is_anomaly': True,
                'type': 'low_si',
                'severity': 'moderate'
            }
        
        return anomaly_info
    
    def _explain_prediction(self, data_point):
        """Use SHAP to explain why the prediction is anomalous"""
        
        # Get SHAP values
        shap_values = self.explainer.shap_values([data_point])
        feature_names = data_point.index.tolist()
        
        # Create explanation dictionary
        explanations = {}
        for i, feature in enumerate(feature_names):
            explanations[feature] = {
                'shap_value': round(shap_values[0][i], 4),
                'feature_value': round(data_point.iloc[i], 4),
                'contribution': 'positive' if shap_values[0][i] > 0 else 'negative'
            }
        
        # Get top 5 most influential features
        top_features = sorted(explanations.items(), 
                            key=lambda x: abs(x[1]['shap_value']), 
                            reverse=True)[:5]
        
        return {
            'all_features': explanations,
            'top_contributors': dict(top_features)
        }
    
    def _generate_recommendations(self, explanations, predicted_si):
        """Generate corrective action recommendations"""
        
        recommendations = []
        top_contributors = explanations['top_contributors']
        
        for feature, info in top_contributors.items():
            shap_val = info['shap_value']
            
            # Only recommend for significant contributors
            if abs(shap_val) > 0.02:
                
                if 'BlTe' in feature or 'BlastTemp' in feature:
                    if shap_val > 0 and predicted_si > 0.6:
                        recommendations.append({
                            'action': 'Reduce blast temperature',
                            'reason': f'High temperature contributing +{shap_val:.3f} to elevated SI',
                            'priority': 'high' if abs(shap_val) > 0.05 else 'medium'
                        })
                    elif shap_val < 0 and predicted_si < 0.4:
                        recommendations.append({
                            'action': 'Increase blast temperature',
                            'reason': f'Low temperature contributing {shap_val:.3f} to reduced SI',
                            'priority': 'high' if abs(shap_val) > 0.05 else 'medium'
                        })
                
                elif 'OxEnRa' in feature or 'Oxygen' in feature:
                    if shap_val > 0 and predicted_si > 0.6:
                        recommendations.append({
                            'action': 'Reduce oxygen enrichment',
                            'reason': f'High oxygen contributing +{shap_val:.3f} to elevated SI',
                            'priority': 'medium'
                        })
                
                elif 'ToGaPr' in feature or 'Pressure' in feature:
                    if shap_val < 0 and predicted_si < 0.4:
                        recommendations.append({
                            'action': 'Increase top gas pressure',
                            'reason': f'Low pressure contributing {shap_val:.3f} to reduced SI',
                            'priority': 'medium'
                        })
        
        # If no specific recommendations, provide general ones
        if not recommendations:
            if predicted_si > 0.7:
                recommendations.append({
                    'action': 'Review overall thermal conditions',
                    'reason': 'SI elevated but no clear single cause identified',
                    'priority': 'low'
                })
            elif predicted_si < 0.3:
                recommendations.append({
                    'action': 'Check furnace thermal state',
                    'reason': 'SI low but no clear single cause identified',
                    'priority': 'low'
                })
        
        return recommendations
    
    def _create_alert_message(self, predicted_si, anomaly_info):
        """Create human-readable alert message"""
        
        severity = anomaly_info['severity']
        anomaly_type = anomaly_info['type']
        
        if severity == 'critical':
            if 'high' in anomaly_type:
                return f"🚨 CRITICAL ALERT: SI predicted at {predicted_si:.3f}% - Furnace overheating risk!"
            else:
                return f"🚨 CRITICAL ALERT: SI predicted at {predicted_si:.3f}% - Furnace too cold!"
        elif severity == 'moderate':
            if 'high' in anomaly_type:
                return f"⚠️ WARNING: SI predicted at {predicted_si:.3f}% - Above normal range"
            else:
                return f"⚠️ WARNING: SI predicted at {predicted_si:.3f}% - Below normal range"
        
        return f"ℹ️ INFO: SI predicted at {predicted_si:.3f}% - Normal operation"

In [41]:
# Initialize the anomaly detector
detector = RealTimeAnomalyDetector(model, training_stats)

In [44]:
# Run anomaly detection

result = detector.detect_anomaly(new_reading_processed)
print(f"Alert: {result['alert_message']}")

Alert: SI prediction: 0.522% - Within normal range




In [45]:
if result['anomaly_detected']:
    print(f"Severity: {result['severity']}")
    print("Recommendations:")
    for rec in result['recommendations']:
        print(f"- {rec['action']}: {rec['reason']}")

Step 4: Integration for Real-Time Monitoring

In [46]:
def simulate_real_time_monitoring(detector, test_data, n_samples=10):
    """Simulate real-time monitoring with your test data"""
    
    print("=== REAL-TIME ANOMALY DETECTION SIMULATION ===\n")
    
    anomaly_count = 0
    
    for i in range(min(n_samples, len(test_data))):
        sample = test_data.iloc[i]
        result = detector.detect_anomaly(sample)
        
        print(f"Time {i+1}: {result['alert_message']}")
        
        if result['anomaly_detected']:
            anomaly_count += 1
            print(f"  Severity: {result['severity']}")
            if result.get('recommendations'):
                print("  Top Recommendation:", result['recommendations'][0]['action'])
            print()
    
    print(f"Summary: {anomaly_count}/{n_samples} samples flagged as anomalies")


In [47]:
# Run simulation with your test data
# simulate_real_time_monitoring(detector, X_test)