In [1]:
# --- Original Setup Script (v4.0 - Probabilistic Anomaly) ---

# 1. Install necessary libraries
print("🚀 Installing libraries...")
!pip install streamlit streamlit-autorefresh pyngrok joblib plotly -q

# 2. Imports
import pandas as pd
import numpy as np
import joblib
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from scipy.stats import chi2
import os

# 3. Enhanced Data Generation Function with more parameters
def generate_sku_data(sku_name, num_normal_batches=10, samples_per_batch=300):
    print(f"🏭 Generating enhanced data for SKU: {sku_name}...")

    # Define SKU-specific golden profiles
    if sku_name == "Chocolate Chip": base_temp, base_protein, base_airflow = 180, 12.5, 15.0
    elif sku_name == "Oatmeal Raisin": base_temp, base_protein, base_airflow = 175, 11.8, 14.5
    else: base_temp, base_protein, base_airflow = 180, 12.5, 15.0

    # Inner function to generate a single batch
    def generate_single_batch(batch_id, is_golden=False, has_anomaly=False):
        time_steps = np.arange(samples_per_batch)
        temp_profile = base_temp + 20 * np.sin(np.pi * time_steps / samples_per_batch)
        airflow_profile = base_airflow + 1 * np.sin(0.5*np.pi*time_steps/samples_per_batch) # Dynamic airflow
        protein_profile = base_protein * np.ones(samples_per_batch)

        noise = 0.5 if is_golden else 1.0
        temp_actual = temp_profile + np.random.normal(0, noise * 0.5, samples_per_batch)
        pressure_actual = 2.5 + 0.1 * np.sin(2*np.pi*time_steps/samples_per_batch) + np.random.normal(0, noise*0.1, samples_per_batch)
        airflow_actual = airflow_profile + np.random.normal(0, noise * 0.2, samples_per_batch)
        raw_material_protein = protein_profile + np.random.normal(0, noise * 0.05, samples_per_batch)

        if has_anomaly:
            anomaly_type = np.random.choice(['low_protein', 'high_temp', 'low_airflow'])
            if anomaly_type == 'low_protein': raw_material_protein -= 1.5
            elif anomaly_type == 'high_temp': temp_actual[100:200] += 5
            else: airflow_actual[50:150] -= 2.0

        moisture = 8.0 - 0.05*(temp_actual - base_temp) + 0.5*(raw_material_protein - base_protein) - 0.1*(airflow_actual - base_airflow) + np.random.normal(0, 0.1, samples_per_batch)

        return pd.DataFrame({
            'batch_id': batch_id, 'timestamp': pd.to_datetime(pd.Timestamp.now().normalize() + pd.to_timedelta(time_steps, unit='s')),
            'temp_setpoint': temp_profile, 'temp_actual': temp_actual,
            'pressure_actual': pressure_actual, 'airflow_setpoint': airflow_profile, 'airflow_actual': airflow_actual,
            'raw_material_protein': raw_material_protein, 'product_moisture_qc': moisture,
            'is_anomaly_source': has_anomaly
        })

    # Generate Golden, Demo (with potential anomaly), and Training batches
    golden_df = generate_single_batch('golden', is_golden=True)
    demo_df = generate_single_batch('live_demo', has_anomaly=True) # Anomaly is probabilistic
    training_data_list = [golden_df] + [generate_single_batch(i, has_anomaly=(i % 4 == 0)) for i in range(num_normal_batches)]

    for df in [golden_df, demo_df] + training_data_list:
        df['temp_deviation'] = df['temp_actual'] - df['temp_setpoint']
        df['airflow_deviation'] = df['airflow_actual'] - df['airflow_setpoint']

    sku_prefix = sku_name.lower().replace(" ", "_")
    golden_df.to_csv(f'golden_{sku_prefix}.csv', index=False)
    demo_df.to_csv(f'demo_{sku_prefix}.csv', index=False)
    return pd.concat(training_data_list)

# --- Main Execution ---
sku_list = ["Chocolate Chip", "Oatmeal Raisin"]
combined_training_data = pd.concat([generate_sku_data(sku) for sku in sku_list])

print("🤖 Training models on data with original anomaly frequency...")
features = ['temp_actual', 'pressure_actual', 'raw_material_protein', 'airflow_actual', 'temp_deviation', 'airflow_deviation']
target = 'product_moisture_qc'

# Create a dataframe containing ONLY normal operating data for the statistical model
normal_batches_df = combined_training_data[combined_training_data['is_anomaly_source'] == False].copy()
print(f"Training anomaly detection model on {len(normal_batches_df)} 'normal' data points.")

# Train the scaler and covariance matrix ONLY on the normal data
scaler = StandardScaler().fit(normal_batches_df[features])
cov_matrix = np.cov(scaler.transform(normal_batches_df[features]), rowvar=False)

# The XGBoost model is trained on ALL data
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100).fit(combined_training_data[features], combined_training_data[target])

# Save all objects
joblib.dump(xgb_model, 'xgb_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(np.linalg.inv(cov_matrix), 'inv_cov_matrix.pkl')
joblib.dump(features, 'features.pkl')
joblib.dump(chi2.ppf(0.99, df=len(features)), 't2_threshold.pkl')

print("✅ Original data generated and models trained. Ready to launch.")

🚀 Installing libraries...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m47.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m700.8/700.8 kB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m88.3 MB/s[0m eta [36m0:00:00[0m
[?25h🏭 Generating enhanced data for SKU: Chocolate Chip...
🏭 Generating enhanced data for SKU: Oatmeal Raisin...
🤖 Training models on data with original anomaly frequency...
Training anomaly detection model on 4800 'normal' data points.
✅ Original data generated and models trained. Ready to launch.


In [2]:
%%writefile app.py

import streamlit as st
import pandas as pd
import numpy as np
import joblib
import time
import plotly.graph_objects as go
import shap
import xgboost
from streamlit_autorefresh import st_autorefresh

# --- Page Configuration ---
st.set_page_config(layout="wide", page_title="F&B Predictive Maintenance System")

# --- Load Models & Static Assets ---
@st.cache_resource
def load_models():
    return {
        "xgb_model": joblib.load('xgb_model.pkl'), "scaler": joblib.load('scaler.pkl'),
        "inv_cov_matrix": joblib.load('inv_cov_matrix.pkl'), "features": joblib.load('features.pkl'),
        "t2_threshold": joblib.load('t2_threshold.pkl'), "explainer": shap.TreeExplainer(joblib.load('xgb_model.pkl'))
    }
models = load_models()

# --- Dynamic Data Loading based on SKU ---
@st.cache_data
def load_sku_data(sku_name):
    sku_prefix = sku_name.lower().replace(" ", "_")
    data_files = {
        "live_demo_batch": pd.read_csv(f'demo_{sku_prefix}.csv', parse_dates=['timestamp']),
        "golden_batch_df": pd.read_csv(f'golden_{sku_prefix}.csv', parse_dates=['timestamp'])
    }
    for name, df in data_files.items():
        if 'temp_deviation' not in df.columns: df['temp_deviation'] = df['temp_actual'] - df['temp_setpoint']
        if 'airflow_deviation' not in df.columns: df['airflow_deviation'] = df['airflow_actual'] - df['airflow_setpoint']
    return data_files

# --- Helper Functions ---
def calculate_health_score(t2_score, t2_threshold):
    health = 100 * (1 - (t2_score / (t2_threshold * 3)))
    return max(0, min(100, health))

def get_alert_details(data_point):
    top_driver_index = np.argmax(np.abs(models["explainer"](data_point[models["features"]]).values))
    top_driver = models["features"][top_driver_index]
    t2_score = (models['scaler'].transform(data_point[models['features']]) @ models['inv_cov_matrix'] @ models['scaler'].transform(data_point[models['features']]).T)[0][0]

    if "protein" in top_driver: impact = f"**Projected Yield Loss: {(0.5 + (t2_score / models['t2_threshold']) * 2):.1f}%**"
    else: impact = f"**Projected Cpk Drift: {max(0.8, 1.33 - (t2_score / models['t2_threshold']) * 0.2):.2f}** (Target: >1.33)"

    cause = f"Primary Driver: **{top_driver.replace('_', ' ').title()}**."
    playbook = {"step_1": "Acknowledge the alert and verify process area.", "step_2": "Notify Shift Supervisor of the deviation."}
    if "temp" in top_driver: playbook["step_3"] = "Check oven heating elements and thermostats."
    elif "protein" in top_driver: playbook["step_3"] = "Contact QA to verify raw material lot ID."
    elif "airflow" in top_driver: playbook["step_3"] = "Inspect fans, filters, and ductwork for blockages."
    else: playbook["step_3"] = "Follow standard procedure for pressure deviations."
    return {"cause": cause, "impact": impact, "playbook": playbook}

# --- Initialize Session State ---
if 'sku' not in st.session_state:
    st.session_state.sku = "Chocolate Chip"
    st.session_state.running = False
    st.session_state.step = 0
    st.session_state.history = pd.DataFrame()
    st.session_state.is_in_anomaly = False
    st.session_state.last_alert = None
    st.session_state.event_log = []

# --- Auto-Refresh Component ---
if st.session_state.running:
    st_autorefresh(interval=2000, limit=None, key="auto_refresher")

# --- Sidebar ---
st.sidebar.title("Configuration & Controls")
sku_list = ["Chocolate Chip", "Oatmeal Raisin"]
selected_sku = st.sidebar.selectbox("Select Product (SKU)", sku_list, index=sku_list.index(st.session_state.sku))
if selected_sku != st.session_state.sku:
    st.session_state.sku = selected_sku; st.session_state.running = False; st.session_state.step = 0; st.session_state.history = pd.DataFrame(); st.session_state.is_in_anomaly = False; st.session_state.last_alert = None; st.rerun()

sku_data = load_sku_data(st.session_state.sku)
st.sidebar.markdown("---")
button_label = "▶️ Start"
if st.session_state.running: button_label = "Running..."
elif 0 < st.session_state.step < len(sku_data['live_demo_batch']): button_label = "▶️ Resume"
if st.sidebar.button(button_label, key="start_resume", disabled=st.session_state.running): st.session_state.running = True; st.rerun()
if st.sidebar.button("⏹️ Stop", key="stop"): st.session_state.running = False; st.rerun()
if st.sidebar.button("🔁 Reset", key="reset"): st.session_state.running = False; st.session_state.step = 0; st.session_state.history = pd.DataFrame(); st.session_state.is_in_anomaly = False; st.session_state.last_alert = None; st.rerun()

# --- Main App Logic ---
if st.session_state.running and st.session_state.step < len(sku_data['live_demo_batch']):
    st.session_state.step += 1

current_step_index = st.session_state.step -1 if st.session_state.step > 0 else 0
current_data = sku_data['live_demo_batch'].iloc[current_step_index:current_step_index+1]
if st.session_state.step > 0 and st.session_state.history.empty:
    st.session_state.history = sku_data['live_demo_batch'].iloc[0:st.session_state.step]
elif st.session_state.running:
    st.session_state.history = sku_data['live_demo_batch'].iloc[0:st.session_state.step]

# --- Main App Display ---
st.title(f"🏭 Predictive Maintenance System: {st.session_state.sku}")
main_tabs = st.tabs(["📊 Live Dashboard", "📂 Event Log & Reporting"])

with main_tabs[0]:
    status_placeholder = st.empty()
    st.markdown("---")
    col1, col2 = st.columns([1, 2])

    t2_score = (models['scaler'].transform(current_data[models['features']]) @ models['inv_cov_matrix'] @ models['scaler'].transform(current_data[models['features']]).T)[0][0]
    health_score = calculate_health_score(t2_score, models['t2_threshold'])
    if health_score < 70 and not st.session_state.is_in_anomaly:
        st.session_state.is_in_anomaly = True; st.session_state.last_alert = get_alert_details(current_data); st.session_state.running = False
    elif health_score >= 70: st.session_state.is_in_anomaly = False

    with col1:
        fig_gauge = go.Figure(go.Indicator(mode="gauge+number", value=health_score, title={'text': "Process Health Score"}, gauge={'axis': {'range': [None, 100]}, 'bar': {'color': "green" if health_score > 80 else "orange" if health_score > 65 else "red"}}))
        fig_gauge.update_layout(height=250, margin=dict(l=10, r=10, b=10, t=50, pad=4)); st.plotly_chart(fig_gauge, use_container_width=True)
        predicted_moisture = models['xgb_model'].predict(current_data[models['features']])[0]
        st.metric("💧 Predicted Moisture", f"{predicted_moisture:.2f} %", f"{predicted_moisture - 8.0:.2f} vs Target")
        st.metric("📈 Anomaly Score (T²)", f"{t2_score:.2f}", f"Threshold: {models['t2_threshold']:.1f}")

    with col2:
        history = st.session_state.history
        if not history.empty:
            history['Health Score'] = history.apply(lambda row: calculate_health_score((models['scaler'].transform(row[models['features']].values.reshape(1, -1)) @ models['inv_cov_matrix'] @ models['scaler'].transform(row[models['features']].values.reshape(1, -1)).T)[0][0], models['t2_threshold']), axis=1)
            anomaly_points = history[history['Health Score'] < 70]
            fig_chart = go.Figure(); fig_chart.add_trace(go.Scatter(x=history['timestamp'], y=history['Health Score'], mode='lines', name='Health Score', line=dict(color='royalblue')))
            if not anomaly_points.empty: fig_chart.add_trace(go.Scatter(x=anomaly_points['timestamp'], y=anomaly_points['Health Score'], mode='markers', name='Anomaly', marker=dict(color='red', size=10, symbol='x')))
            fig_chart.update_layout(title="Health Score Over Time", yaxis_range=[0,105]); st.plotly_chart(fig_chart, use_container_width=True)

    if st.session_state.last_alert:
        with st.expander("🚨 ALERT: Corrective Action Required", expanded=True):
             # --- THIS IS THE FIX ---
             st.error(st.session_state.last_alert['impact']); st.warning(st.session_state.last_alert['cause'])
             # --- END OF FIX ---
             with st.form("playbook_form"):
                 responses = {step: st.checkbox(label=st.session_state.last_alert['playbook'][step]) for step in st.session_state.last_alert['playbook']}
                 submitted = st.form_submit_button("Acknowledge & Log Event")
                 if submitted and all(responses.values()):
                     st.session_state.event_log.append({"Timestamp": pd.Timestamp.now(), "SKU": st.session_state.sku, "Alert Cause": st.session_state.last_alert['cause'], "Quality Impact": st.session_state.last_alert['impact'], "Actions Taken": ", ".join([st.session_state.last_alert['playbook'][s] for s,r in responses.items() if r]), "Acknowledged By": "Operator_01"})
                     st.success("Event logged successfully!"); st.session_state.last_alert = None; time.sleep(1); st.rerun()
                 elif submitted: st.error("Please complete all playbook steps before logging.")

    with st.expander("Deep Dive: Process Parameter Charts", expanded=True):
        if not st.session_state.history.empty:
            param_tabs = st.tabs(["Temperature", "Airflow", "Pressure", "Raw Material"])
            with param_tabs[0]:
                fig = go.Figure(); fig.add_trace(go.Scatter(x=history['timestamp'], y=history['temp_actual'], name="Live", line=dict(color='red'))); fig.add_trace(go.Scatter(x=history['timestamp'], y=history['temp_setpoint'], name="Setpoint", line=dict(color='gray', dash='dash'))); fig.add_trace(go.Scatter(x=history['timestamp'], y=sku_data['golden_batch_df']['temp_actual'], name="Golden Batch", line=dict(color='gold', dash='dot'))); st.plotly_chart(fig, use_container_width=True)
            with param_tabs[1]:
                fig = go.Figure(); fig.add_trace(go.Scatter(x=history['timestamp'], y=history['airflow_actual'], name="Live", line=dict(color='deepskyblue'))); fig.add_trace(go.Scatter(x=history['timestamp'], y=history['airflow_setpoint'], name="Setpoint", line=dict(color='gray', dash='dash'))); fig.add_trace(go.Scatter(x=history['timestamp'], y=sku_data['golden_batch_df']['airflow_actual'], name="Golden Batch", line=dict(color='gold', dash='dot'))); st.plotly_chart(fig, use_container_width=True)
            with param_tabs[2]:
                fig = go.Figure(); fig.add_trace(go.Scatter(x=history['timestamp'], y=history['pressure_actual'], name="Live", line=dict(color='blueviolet'))); fig.add_trace(go.Scatter(x=history['timestamp'], y=sku_data['golden_batch_df']['pressure_actual'], name="Golden Batch", line=dict(color='gold', dash='dot'))); st.plotly_chart(fig, use_container_width=True)
            with param_tabs[3]:
                fig = go.Figure(); fig.add_trace(go.Scatter(x=history['timestamp'], y=history['raw_material_protein'], name="Live", line=dict(color='brown'))); fig.add_trace(go.Scatter(x=history['timestamp'], y=sku_data['golden_batch_df']['raw_material_protein'], name="Golden Batch", line=dict(color='gold', dash='dot'))); st.plotly_chart(fig, use_container_width=True)

    if st.session_state.running: status_placeholder.header(f"▶️ Running... (Health: {health_score:.0f}%)")
    elif st.session_state.step >= len(sku_data['live_demo_batch']): status_placeholder.success("✅ Simulation Finished.")
    elif st.session_state.is_in_anomaly: status_placeholder.error("🚨 Simulation Paused on Anomaly. Review details and complete playbook.")
    elif st.session_state.step > 0: status_placeholder.info("ℹ️ Simulation is paused. Press 'Resume' to continue.")
    else: status_placeholder.info("ℹ️ Simulation is ready. Press 'Start' to begin.")

with main_tabs[1]:
    st.header("📂 Historical Event Log")
    if not st.session_state.event_log: st.info("No events have been logged during this session.")
    else:
        log_df = pd.DataFrame(st.session_state.event_log).sort_values(by="Timestamp", ascending=False)
        st.dataframe(log_df, use_container_width=True)
        csv = log_df.to_csv(index=False).encode('utf-8')
        st.download_button(label="📥 Download Log as CSV", data=csv, file_name='anomaly_event_log.csv', mime='text/csv')

Writing app.py


In [3]:
from pyngrok import ngrok

# Terminate open tunnels if any
ngrok.kill()

# Set your ngrok authtoken
NGROK_AUTH_TOKEN = "31eXTJ1vwYC80Mb3tqVZ2y65uqT_7o1EHT2PgnjYURTVzj4iw"  # Replace with your token
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Run streamlit in background
!nohup streamlit run app.py --server.port 8501 &

# Open a tunnel to the streamlit port
public_url = ngrok.connect(8501)
print(f"Click the URL to open the app: {public_url}")

nohup: appending output to 'nohup.out'
Click the URL to open the app: NgrokTunnel: "https://f891ee8325b4.ngrok-free.app" -> "http://localhost:8501"


In [4]:
!pip freeze > requirements.txt