In [None]:
CREATE DATABASE IF NOT EXISTS PID_DEMO;
CREATE OR REPLACE STAGE PID_IMAGES DIRECTORY = (ENABLE = TRUE) ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE');
CREATE OR REPLACE TABLE images_table(img FILE, pid_description varchar);

Copy PI Diagram from https://en.wikipedia.org/wiki/Piping_and_instrumentation_diagram#/media/File:Pump_with_tank_pid_en.svg to the STAGE PID_IMAGES

In [None]:
INSERT INTO images_table
  SELECT TO_FILE(file_url) as img, '' AS pid_description FROM DIRECTORY(@PID_DEMO.PUBLIC.PID_IMAGES);

UPDATE images_table
SET pid_description = 
(
SELECT 
    snowflake.cortex.complete('pixtral-large'
    ,'You are technical writer, interpret the Piping and instrumentation diagram according to EN ISO 10628 instruments according to EN 62424 and create a technical documentation for the process flow, asset hierarchy and the tag list.'
    , img)
FROM images_table
);

select * From images_table;

In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from datetime import datetime, timedelta

from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
# Create timestamp range
start_time = datetime(2024, 1, 1, 0, 0)
timestamps = [start_time + timedelta(minutes=i) for i in range(1440)]

# Initialize normal operating parameters
normal_tank_level = 65  # Normal tank level (%)
normal_pressure = 6.0   # Normal pressure (barg)
pump_status = 1        # 1 = running, 0 = stopped
flow_rate = 6.0        # Normal flow rate (m³/h)

# Create empty lists for data
data = []

# Generate data with anomalies
for i, timestamp in enumerate(timestamps):
    # Add normal variation
    level = normal_tank_level + np.random.normal(0, 0.5)
    pressure = normal_pressure + np.random.normal(0, 0.1)
    current_flow = flow_rate + np.random.normal(0, 0.2)
    
    # Introduce anomalies
    
    # Anomaly 1: Sudden pressure drop (pump cavitation) between 3:00-3:30 AM
    if 180 <= i < 210:
        pressure = pressure * 0.7
        current_flow = current_flow * 0.6
    
    # Anomaly 2: High tank level alarm (85-90%) between 10:00-10:30 AM
    if 600 <= i < 630:
        level = 88 + np.random.normal(0, 0.5)
    
    # Anomaly 3: Pump trip between 15:00-15:30 PM
    if 900 <= i < 930:
        pump_status = 0
        pressure = 0.2 + np.random.normal(0, 0.05)
        current_flow = 0
    else:
        pump_status = 1
    
    # Add row to data
    data.append({
        'Timestamp': timestamp,
        'T001_Level': round(level, 2),
        'P001_Outlet_Pressure': round(pressure, 2),
        'P001_Flow_Rate': round(current_flow, 2),
        'P001_Status': pump_status,
        'L0011_High_Level_Alarm': 1 if level > 85 else 0,
        'L0012_Low_Level_Alarm': 1 if level < 20 else 0,
        'PI0013_High_Pressure_Alarm': 1 if pressure > 7 else 0
    })

# Convert to DataFrame
df = pd.DataFrame(data)

# Display first few rows
print(df.head())

# Display summary of anomalies
print("\nAnomaly Summary:")
print("1. Pump Cavitation (3:00-3:30 AM): Pressure drop to ~4.2 barg")
print("2. High Tank Level (10:00-10:30 AM): Level ~88%")
print("3. Pump Trip (15:00-15:30 PM): Zero pressure and flow")

In [None]:
# Query to fetch data
df = session.sql("SELECT \"Timestamp\", \"T001_Level\", \"P001_Outlet_Pressure\", \"P001_Flow_Rate\" FROM PID_IOT_DATA ORDER BY \"Timestamp\"").collect()
df = pd.DataFrame(df)

# Create the visualization
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 12))
fig.suptitle('PID IoT Data Time Series', fontsize=16)

# Plot Level
sns.lineplot(data=df, x='Timestamp', y='T001_Level', ax=ax1, color='blue')
ax1.set_title('Tank Level Over Time')
ax1.set_xlabel('')
ax1.set_ylabel('Level')
ax1.grid(True)

# Plot Outlet Pressure
sns.lineplot(data=df, x='Timestamp', y='P001_Outlet_Pressure', ax=ax2, color='red')
ax2.set_title('Outlet Pressure Over Time')
ax2.set_xlabel('')
ax2.set_ylabel('Pressure')
ax2.grid(True)

# Plot Flow Rate
sns.lineplot(data=df, x='Timestamp', y='P001_Flow_Rate', ax=ax3, color='green')
ax3.set_title('Flow Rate Over Time')
ax3.set_xlabel('Timestamp')
ax3.set_ylabel('Flow Rate')
ax3.grid(True)

# Adjust layout to prevent overlap
plt.tight_layout()

# Save the plot (optional)
plt.savefig('pid_iot_timeseries.png')

# Show the plot
plt.show()


In [None]:
session.write_pandas(df, "PID_IOT_DATA", auto_create_table=True, use_logical_type=True, overwrite=True)

In [None]:
CREATE OR REPLACE TABLE PID_DEMO.PUBLIC.EVENT_DATA (
    event_id INTEGER IDENTITY(1,1),
    timestamp TIMESTAMP_NTZ,
    sensor_name VARCHAR,
    anomaly_score FLOAT,
    description VARCHAR,
    created_at TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP()
)

In [None]:
# Function to detect anomalies using Isolation Forest
def detect_anomalies(data, sensor_name, contamination=0.1):
    # Reshape data for Isolation Forest
    X = data.values.reshape(-1, 1)
    
    # Initialize and fit Isolation Forest
    iso_forest = IsolationForest(contamination=contamination, random_state=42)
    yhat = iso_forest.fit_predict(X)
    
    # Get anomaly scores
    scores = iso_forest.score_samples(X)
    
    # Create mask for anomalies (-1 indicates anomaly)
    anomalies = yhat == -1
    
    return anomalies, scores


sensor_data = session.sql("SELECT \"Timestamp\", \"T001_Level\", \"P001_Outlet_Pressure\", \"P001_Flow_Rate\" FROM PID_IOT_DATA ORDER BY \"Timestamp\"").collect()
sensor_data_df = pd.DataFrame(sensor_data)

# List of sensor columns (excluding timestamp)
sensor_columns = sensor_data_df.columns.drop('Timestamp').tolist()

# Process each sensor
anomaly_events = []
for sensor in sensor_columns:
    # Detect anomalies
    anomalies, scores = detect_anomalies(sensor_data_df[sensor], sensor)
    
    # Get timestamps where anomalies occurred
    anomaly_timestamps = sensor_data_df.loc[anomalies, 'Timestamp']
    anomaly_scores = scores[anomalies]
    
    # Create event records
    for ts, score in zip(anomaly_timestamps, anomaly_scores):
        event = {
            'timestamp': ts,
            'sensor_name': sensor,
            'anomaly_score': score,
            'description': f'Anomaly detected in {sensor} with score {score:.3f}'
        }
        anomaly_events.append(event)

# Create DataFrame from events and save to database
if anomaly_events:
    events_df = pd.DataFrame(anomaly_events)
    session.write_pandas(events_df, "EVENT_DATA", auto_create_table=True, use_logical_type=True, overwrite=True)

In [None]:
sensor_data_df = session.sql("SELECT * from PID_DEMO.PUBLIC.PID_IOT_DATA").collect()

In [None]:
event_data_df = session.sql("SELECT * from PID_DEMO.PUBLIC.EVENT_DATA").collect()

In [None]:
pid_df = session.sql("SELECT pid_description FROM PID_DEMO.PUBLIC.images_table").collect()

In [None]:
instructions = "You are a mechanical engineer, Use following data from the process instrumentation diagrams and the sensors to analyse the performance and status of the process."

In [None]:
prompt = instructions + str(pid_df) + str(event_data_df) + str(sensor_data_df)
model = 'claude-3-5-sonnet'

In [None]:
performance_analysis = session.sql("SELECT snowflake.cortex.complete(?,?)", (model,prompt)).collect()[0][0]
print(performance_analysis)