# Predictive Maintenance for Industrial IoT
## Step 7: Documentation & Dashboard

This notebook covers:
1. Creating an interactive dashboard with Streamlit
2. Visualizing model predictions and results
3. Building a monitoring interface for real-time data


In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import json
import warnings

warnings.filterwarnings('ignore')

# Display all columns in pandas dataframes
pd.set_option('display.max_columns', None)


### Step 7.1: Creating a Streamlit Dashboard


In [3]:
# Create a Streamlit dashboard
# Note: We can't run Streamlit directly in a Jupyter notebook
# Instead, we'll create a Streamlit app file that can be run separately

streamlit_app_code = """
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
import requests
import json
from datetime import datetime, timedelta
import time
import plotly.express as px
import plotly.graph_objects as go

# Set page configuration
st.set_page_config(
    page_title="Predictive Maintenance Dashboard",
    page_icon="ðŸ”§",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Define functions for loading data and models
@st.cache_data
def load_data():
    # In a real application, you would load your actual data
    # For demonstration, we'll create synthetic data
    
    # Create a date range for the past 30 days
    dates = pd.date_range(end=datetime.now(), periods=30*24, freq='H')
    
    # Create sensor readings with some random noise
    data = pd.DataFrame({
        'timestamp': dates,
        'sensor_1': np.sin(np.linspace(0, 15*np.pi, len(dates))) + np.random.normal(0, 0.1, len(dates)),
        'sensor_2': np.cos(np.linspace(0, 15*np.pi, len(dates))) + np.random.normal(0, 0.1, len(dates)),
        'sensor_3': np.random.normal(0, 0.2, len(dates)).cumsum(),
        'sensor_4': np.random.normal(1, 0.1, len(dates)),
        'sensor_5': np.random.normal(0, 0.3, len(dates)).cumsum() + np.sin(np.linspace(0, 5*np.pi, len(dates)))
    })
    
    # Add some anomalies
    anomaly_indices = [100, 250, 400, 550, 650]
    for idx in anomaly_indices:
        data.loc[idx:idx+10, 'sensor_1'] += 2.0
        data.loc[idx:idx+10, 'sensor_2'] -= 1.5
        data.loc[idx:idx+10, 'sensor_4'] *= 1.5
    
    # Add a failure flag column (1 for failure, 0 for normal)
    data['failure'] = 0
    for idx in anomaly_indices:
        data.loc[idx+5:idx+15, 'failure'] = 1
    
    return data

@st.cache_resource
def load_model():
    # In a real application, you would load your trained model
    # For demonstration, we'll create a simple function that returns random predictions
    def dummy_model(data):
        # Calculate anomaly scores based on sensor readings
        scores = (
            np.abs(data['sensor_1']) + 
            np.abs(data['sensor_2']) + 
            np.abs(data['sensor_3']) + 
            np.abs(data['sensor_4']) + 
            np.abs(data['sensor_5'])
        ) / 5
        
        # Normalize scores to [0, 1]
        scores = (scores - scores.min()) / (scores.max() - scores.min())
        
        # Return predictions and probabilities
        predictions = (scores > 0.7).astype(int)
        probabilities = scores
        
        return predictions, probabilities
    
    return dummy_model

# Load data and model
data = load_data()
model = load_model()

# Make predictions
predictions, probabilities = model(data)
data['prediction'] = predictions
data['probability'] = probabilities

# Calculate metrics
true_positives = ((data['prediction'] == 1) & (data['failure'] == 1)).sum()
false_positives = ((data['prediction'] == 1) & (data['failure'] == 0)).sum()
true_negatives = ((data['prediction'] == 0) & (data['failure'] == 0)).sum()
false_negatives = ((data['prediction'] == 0) & (data['failure'] == 1)).sum()

accuracy = (true_positives + true_negatives) / len(data)
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

# Create the Streamlit dashboard
st.title("Predictive Maintenance Dashboard")

# Sidebar
st.sidebar.header("Settings")
st.sidebar.subheader("Filter Data")

# Date range filter
date_range = st.sidebar.date_input(
    "Select Date Range",
    [data['timestamp'].min().date(), data['timestamp'].max().date()]
)

# Convert date_range to datetime for filtering
start_date = pd.Timestamp(date_range[0])
end_date = pd.Timestamp(date_range[1]) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)

# Filter data based on date range
filtered_data = data[(data['timestamp'] >= start_date) & (data['timestamp'] <= end_date)]

# Sensor selection
st.sidebar.subheader("Select Sensors")
selected_sensors = st.sidebar.multiselect(
    "Choose sensors to display",
    ['sensor_1', 'sensor_2', 'sensor_3', 'sensor_4', 'sensor_5'],
    default=['sensor_1', 'sensor_2']
)

# Threshold for anomaly detection
threshold = st.sidebar.slider("Anomaly Detection Threshold", 0.0, 1.0, 0.7, 0.01)

# Dashboard content
# Row 1: Key metrics
st.header("Key Metrics")
col1, col2, col3, col4 = st.columns(4)

with col1:
    st.metric("Accuracy", f"{accuracy:.2%}")
    
with col2:
    st.metric("Precision", f"{precision:.2%}")
    
with col3:
    st.metric("Recall", f"{recall:.2%}")
    
with col4:
    st.metric("F1 Score", f"{f1:.2%}")

# Row 2: Sensor readings and anomaly detection
st.header("Sensor Readings and Anomaly Detection")

# Create a plotly figure
fig = go.Figure()

# Add sensor readings
for sensor in selected_sensors:
    fig.add_trace(go.Scatter(
        x=filtered_data['timestamp'],
        y=filtered_data[sensor],
        mode='lines',
        name=sensor
    ))

# Add anomaly markers
anomalies = filtered_data[filtered_data['probability'] > threshold]
if not anomalies.empty:
    fig.add_trace(go.Scatter(
        x=anomalies['timestamp'],
        y=[0] * len(anomalies),  # Place markers at the bottom
        mode='markers',
        marker=dict(
            symbol='x',
            color='red',
            size=10
        ),
        name='Detected Anomalies'
    ))

# Add true failures
failures = filtered_data[filtered_data['failure'] == 1]
if not failures.empty:
    fig.add_trace(go.Scatter(
        x=failures['timestamp'],
        y=[0] * len(failures),  # Place markers at the bottom
        mode='markers',
        marker=dict(
            symbol='circle',
            color='black',
            size=10
        ),
        name='True Failures'
    ))

# Update layout
fig.update_layout(
    title='Sensor Readings Over Time',
    xaxis_title='Time',
    yaxis_title='Value',
    height=500,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Display the plot
st.plotly_chart(fig, use_container_width=True)

# Row 3: Anomaly probability
st.header("Anomaly Probability")

# Create a plotly figure for anomaly probability
fig_prob = go.Figure()

# Add probability line
fig_prob.add_trace(go.Scatter(
    x=filtered_data['timestamp'],
    y=filtered_data['probability'],
    mode='lines',
    name='Anomaly Probability'
))

# Add threshold line
fig_prob.add_trace(go.Scatter(
    x=filtered_data['timestamp'],
    y=[threshold] * len(filtered_data),
    mode='lines',
    line=dict(color='red', dash='dash'),
    name=f'Threshold ({threshold})'
))

# Update layout
fig_prob.update_layout(
    title='Anomaly Probability Over Time',
    xaxis_title='Time',
    yaxis_title='Probability',
    height=400,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Display the plot
st.plotly_chart(fig_prob, use_container_width=True)

# Row 4: Recent anomalies table
st.header("Recent Anomalies")

# Get recent anomalies
recent_anomalies = filtered_data[filtered_data['probability'] > threshold].tail(10)
if not recent_anomalies.empty:
    # Format the table
    table_data = recent_anomalies[['timestamp', 'probability'] + selected_sensors].copy()
    table_data['timestamp'] = table_data['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
    table_data['probability'] = table_data['probability'].map('{:.2%}'.format)
    
    # Display the table
    st.dataframe(table_data, use_container_width=True)
else:
    st.info("No anomalies detected in the selected time range.")

# Row 5: Confusion matrix
st.header("Model Performance")

col1, col2 = st.columns(2)

with col1:
    # Create confusion matrix
    cm = np.array([[true_negatives, false_positives], [false_negatives, true_positives]])
    
    # Create a plotly figure for confusion matrix
    fig_cm = px.imshow(
        cm,
        labels=dict(x="Predicted", y="Actual", color="Count"),
        x=['Normal', 'Failure'],
        y=['Normal', 'Failure'],
        text_auto=True,
        color_continuous_scale='Blues'
    )
    
    # Update layout
    fig_cm.update_layout(
        title='Confusion Matrix',
        height=400
    )
    
    # Display the plot
    st.plotly_chart(fig_cm, use_container_width=True)

with col2:
    # Create ROC curve (simplified for demonstration)
    fpr = [0, false_positives / (false_positives + true_negatives), 1]
    tpr = [0, true_positives / (true_positives + false_negatives), 1]
    
    # Create a plotly figure for ROC curve
    fig_roc = go.Figure()
    
    # Add ROC curve
    fig_roc.add_trace(go.Scatter(
        x=fpr,
        y=tpr,
        mode='lines+markers',
        name='ROC Curve'
    ))
    
    # Add diagonal line
    fig_roc.add_trace(go.Scatter(
        x=[0, 1],
        y=[0, 1],
        mode='lines',
        line=dict(color='gray', dash='dash'),
        name='Random'
    ))
    
    # Update layout
    fig_roc.update_layout(
        title='ROC Curve',
        xaxis_title='False Positive Rate',
        yaxis_title='True Positive Rate',
        height=400
    )
    
    # Display the plot
    st.plotly_chart(fig_roc, use_container_width=True)

# Footer
st.markdown("---")
st.markdown("Predictive Maintenance Dashboard | Created with Streamlit")
"""

# Save the Streamlit app code to a file
streamlit_app_path = "../src/dashboard/app.py"
os.makedirs(os.path.dirname(streamlit_app_path), exist_ok=True)
with open(streamlit_app_path, 'w', encoding='utf-8') as file:
    file.write(streamlit_app_code)
print(f"Streamlit app code saved to {streamlit_app_path}")

# Create requirements.txt for the dashboard
dashboard_requirements = """
streamlit==1.22.0
pandas==2.0.1
numpy==1.24.3
matplotlib==3.7.1
seaborn==0.12.2
plotly==5.14.1
scikit-learn==1.2.2
requests==2.28.2
"""

dashboard_requirements_path = "../src/dashboard/requirements.txt"
with open(dashboard_requirements_path, 'w', encoding='utf-8') as file:
    file.write(dashboard_requirements)
print(f"Dashboard requirements saved to {dashboard_requirements_path}")

# Create a README file for the dashboard
dashboard_readme = """
# Predictive Maintenance Dashboard

This dashboard provides a visual interface for monitoring equipment health and predicting failures.

## Features

- Real-time monitoring of sensor data
- Anomaly detection and visualization
- Performance metrics for the predictive model
- Historical data analysis

## Installation

1. Install the required packages:
   ```
   pip install -r requirements.txt
   ```

2. Run the dashboard:
   ```
   streamlit run app.py
   ```

3. Open your browser and navigate to http://localhost:8501

## Usage

- Use the sidebar to filter data by date range and select sensors to display
- Adjust the anomaly detection threshold to control sensitivity
- View real-time sensor readings and detected anomalies
- Monitor model performance metrics

## Customization

To customize the dashboard for your specific use case:

1. Modify the `load_data()` function to load your actual data
2. Update the `load_model()` function to use your trained model
3. Adjust the visualizations and metrics as needed
"""

dashboard_readme_path = "../src/dashboard/README.md"
with open(dashboard_readme_path, 'w', encoding='utf-8') as file:
    file.write(dashboard_readme)
print(f"Dashboard README saved to {dashboard_readme_path}")

print("\nDashboard files created successfully!")


Streamlit app code saved to ../src/dashboard/app.py
Dashboard requirements saved to ../src/dashboard/requirements.txt
Dashboard README saved to ../src/dashboard/README.md

Dashboard files created successfully!


### Step 7.2: Running the Dashboard


In [None]:
# Instructions for running the dashboard
print("To run the Streamlit dashboard, follow these steps:")
print("1. Open a terminal and navigate to the dashboard directory:")
print("   cd src/dashboard")
print("2. Install the required packages:")
print("   pip install -r requirements.txt")
print("3. Run the Streamlit app:")
print("   streamlit run app.py")
print("4. Open your browser and navigate to http://localhost:8501")

print("\nThe dashboard provides the following features:")
print("- Real-time monitoring of sensor data")
print("- Anomaly detection and visualization")
print("- Performance metrics for the predictive model")
print("- Historical data analysis")

print("\nYou can customize the dashboard by:")
print("1. Modifying the `load_data()` function to load your actual data")
print("2. Updating the `load_model()` function to use your trained model")
print("3. Adjusting the visualizations and metrics as needed")

# Create a screenshot of the dashboard (for demonstration purposes)
dashboard_screenshot = """
+-----------------------------------------------------------------------+
|                     Predictive Maintenance Dashboard                   |
+---------------+---------------------------------------------------+
| Settings      |  Key Metrics                                       |
| ------------- |  +---------+ +---------+ +---------+ +---------+   |
| Filter Data   |  | Accuracy| |Precision| | Recall  | |F1 Score |   |
|               |  |  95.2%  | |  78.6%  | |  82.1%  | |  80.3%  |   |
| Date Range    |  +---------+ +---------+ +---------+ +---------+   |
| [Start] [End] |                                                     |
|               |  Sensor Readings and Anomaly Detection              |
| Select Sensors|  +---------------------------------------------------+
| [x] Sensor 1  |  |                                                   |
| [x] Sensor 2  |  |                 [Line Chart with Anomalies]       |
| [ ] Sensor 3  |  |                                                   |
| [ ] Sensor 4  |  |                                                   |
| [ ] Sensor 5  |  +---------------------------------------------------+
|               |                                                     |
| Threshold     |  Anomaly Probability                                |
| [====|===]    |  +---------------------------------------------------+
|               |  |                                                   |
|               |  |           [Probability Chart with Threshold]      |
|               |  |                                                   |
|               |  +---------------------------------------------------+
|               |                                                     |
|               |  Recent Anomalies                                   |
|               |  +---------------------------------------------------+
|               |  | Timestamp | Probability | Sensor 1 | Sensor 2 |   |
|               |  | 2023-05-01| 85.2%       | 1.23     | -0.45    |   |
|               |  | 2023-05-02| 92.1%       | 2.34     | -1.67    |   |
|               |  +---------------------------------------------------+
|               |                                                     |
|               |  Model Performance                                  |
|               |  +-------------------+ +-------------------+        |
|               |  |                   | |                   |        |
|               |  |  Confusion Matrix | |    ROC Curve      |        |
|               |  |                   | |                   |        |
|               |  +-------------------+ +-------------------+        |
+---------------+---------------------------------------------------+
|              Predictive Maintenance Dashboard | Created with Streamlit |
+-----------------------------------------------------------------------+
"""

print("\nDashboard Preview:")
print(dashboard_screenshot)


In [None]:
# Create a comprehensive project documentation
project_documentation = """
# Predictive Maintenance for Industrial IoT - Project Documentation

## Project Overview

This project implements a predictive maintenance system for industrial IoT devices using machine learning techniques. It analyzes sensor data to detect anomalies and predict potential failures before they occur.

## Dataset

The project uses the APS Failure at Scania Trucks dataset, which contains sensor readings from the Air Pressure System (APS) of Scania trucks. The dataset consists of:

- Training set: 60,000 examples (59,000 negative, 1,000 positive)
- Test set: 16,000 examples
- 171 attributes (anonymized sensor readings)
- Binary classification task: positive class (APS component failures) vs. negative class (non-APS related failures)

## Project Workflow

### Step 1: Data Collection & Preprocessing
- Dataset selection: APS Failure at Scania Trucks
- Data loading and exploration
- Handling missing values with median imputation
- Feature scaling with StandardScaler
- Feature importance analysis with Random Forest

### Step 2: Exploratory Data Analysis (EDA)
- Visualization of sensor readings
- Analysis of correlations between features
- Stationarity testing with ADF
- Documentation of findings

### Step 3: Anomaly Detection
- Implementation of three anomaly detection algorithms:
  - Isolation Forest
  - One-Class SVM
  - Autoencoder
- Evaluation using precision, recall, and F1 score
- Visualization of anomalies over time

### Step 4: Predictive Modeling
- ARIMA for time series prediction
- LSTM for sequence modeling
- Random Forest/XGBoost for classification
- Train-test split and model evaluation

### Step 5: Model Evaluation & Selection
- Cross-validation for robust evaluation
- Hyperparameter tuning for model optimization
- Final model selection based on performance metrics
- Saving the best model for deployment

### Step 6: Cloud Deployment
- Creating a Flask API for model deployment
- Preparing the model for deployment
- Testing the API locally
- Guidelines for cloud deployment (AWS, Azure, GCP)

### Step 7: Documentation & Dashboard
- Creating an interactive dashboard with Streamlit
- Visualizing model predictions and results
- Building a monitoring interface for real-time data

## Key Findings

1. **Data Quality**:
   - The dataset contains many missing values, which were handled using median imputation
   - No duplicate samples were found
   - All features are numerical (after conversion)

2. **Feature Importance**:
   - Top features were identified using Random Forest
   - These features were used for modeling to reduce dimensionality

3. **Anomaly Detection**:
   - Isolation Forest achieved the best precision
   - Autoencoder achieved the best recall
   - One-Class SVM provided a good balance between precision and recall

4. **Predictive Modeling**:
   - ARIMA provided good short-term forecasts
   - LSTM captured complex patterns in the time series data
   - XGBoost achieved the best classification performance

5. **Model Evaluation**:
   - Cross-validation confirmed the robustness of the models
   - Hyperparameter tuning improved model performance
   - The best model was selected based on F1 score

## Usage Instructions

### Running the Notebooks

1. Install the required packages:
   ```
   pip install -r requirements.txt
   ```

2. Run the Jupyter notebooks in sequence:
   ```
   jupyter notebook notebooks/01_data_processing_and_eda.ipynb
   jupyter notebook notebooks/02_anomaly_detection.ipynb
   jupyter notebook notebooks/03_predictive_modeling.ipynb
   jupyter notebook notebooks/04_model_evaluation.ipynb
   jupyter notebook notebooks/05_deployment.ipynb
   jupyter notebook notebooks/06_dashboard.ipynb
   ```

### Running the API

1. Navigate to the deployment directory:
   ```
   cd src/deployment
   ```

2. Install the required packages:
   ```
   pip install -r requirements.txt
   ```

3. Run the Flask app:
   ```
   python app.py
   ```

4. The API will be available at http://localhost:5000

### Running the Dashboard

1. Navigate to the dashboard directory:
   ```
   cd src/dashboard
   ```

2. Install the required packages:
   ```
   pip install -r requirements.txt
   ```

3. Run the Streamlit app:
   ```
   streamlit run app.py
   ```

4. The dashboard will be available at http://localhost:8501

## Future Work

1. **Data Collection**:
   - Incorporate real-time sensor data
   - Expand to additional sensor types

2. **Model Improvement**:
   - Explore deep learning architectures for time series forecasting
   - Implement ensemble methods for improved performance
   - Incorporate domain knowledge into feature engineering

3. **Deployment**:
   - Deploy the model to a cloud platform
   - Implement CI/CD pipeline for model updates
   - Add authentication and security features

4. **Dashboard**:
   - Add real-time monitoring capabilities
   - Implement alert systems for detected anomalies
   - Enhance visualizations for better interpretability

## Conclusion

This project demonstrates the effectiveness of machine learning techniques for predictive maintenance in industrial IoT applications. By detecting anomalies and predicting failures before they occur, the system can help reduce downtime, maintenance costs, and improve overall operational efficiency.

The combination of time series forecasting, anomaly detection, and classification models provides a comprehensive approach to predictive maintenance. The interactive dashboard and API allow for easy integration into existing industrial systems.
"""

# Save the project documentation to a file
documentation_path = "../project_documentation.md"
with open(documentation_path, 'w', encoding='utf-8') as file:
    file.write(project_documentation)
print(f"Project documentation saved to {documentation_path}")

print("\nProject Documentation Summary:")
print("1. Project Overview")
print("2. Dataset Description")
print("3. Project Workflow (Steps 1-7)")
print("4. Key Findings")
print("5. Usage Instructions")
print("6. Future Work")
print("7. Conclusion")

print("\nThe documentation provides a comprehensive overview of the project, including:")
print("- The data preprocessing and exploration steps")
print("- The anomaly detection and predictive modeling techniques used")
print("- The model evaluation and selection process")
print("- Instructions for running the API and dashboard")
print("- Future work and improvements")

print("\nProject completed successfully!")
