## Setting up Pipeline Alerts

# Level 1 (Beginner)

To start with we will explore different option to monitor the **health** of **Tasks, Pipes and Dynamic Tables**. 

We can apply checks to either individiual objects or all objects within a Schema or Database. The latter is recommended as it automatically includes any future objects.

## 1. Setting up message destinations

To send out notifications from Snowflake we first need a **Notification Integration** for each destination.

For this demo we will use **email** (only works for verified user emails!) and a **Slack webhook** (https://api.slack.com/messaging/webhooks) (you can also use MS Teams or PagerDuty):

In [None]:
#this cell is not needed to run the demo. it is just convenient as a UI for your credentials

import streamlit as st
from snowflake.snowpark.context import get_active_session
session = get_active_session()

st.divider()
col1, col2 = st.columns([1,1])
MY_DEMO_SLACK_SECRET = col1.text_input("Enter your slack webhook secret")
MY_DEMO_EMAIL = col1.text_input("Enter your verified user email")
if MY_DEMO_SLACK_SECRET == "" or MY_DEMO_EMAIL == "":
    raise Exception("Webhook string and Email needed to configure notifications below")

In [None]:
--- setting email notification integration as destination for our Alert messages

create or replace notification integration DEMO_EMAIL_NOTIFICATIONS
  type = email
  enabled = true
;

In [None]:
call SYSTEM$SEND_SNOWFLAKE_NOTIFICATION(
    SNOWFLAKE.NOTIFICATION.TEXT_PLAIN(
        'Hello from Snowflake'                          -- my message
        ),
    SNOWFLAKE.NOTIFICATION.EMAIL_INTEGRATION_CONFIG(
        'DEMO_EMAIL_NOTIFICATIONS',                     -- notification integration
        'Snowflake DEMO Pipeline Alert',                -- email header
        ARRAY_CONSTRUCT('{{MY_DEMO_EMAIL}}'),           -- emails
        NULL,                                           -- no CC emails
        NULL                                            -- no BCC emails
        )
    )
;

In [None]:
--- getting secret from your Slack channel
--- see Slack documentation for details

create or replace secret DEMO_SLACK_WEBHOOK
    type = GENERIC_STRING
    secret_string = '{{MY_DEMO_SLACK_SECRET}}'
;

In [None]:
--- setting Slack notificaiton integration as destination for our Alert messages
--- https://docs.snowflake.com/sql-reference/sql/create-notification-integration-webhooks

create or replace notification integration SLACK_CHANNEL_PIPELINE_ALERTS
    type = WEBHOOK
    enabled = TRUE
    webhook_url = 'https://hooks.slack.com/services/SNOWFLAKE_WEBHOOK_SECRET'
    webhook_secret = DEX_DB.DEMO.DEMO_SLACK_WEBHOOK
    webhook_body_template = '{"text": "SNOWFLAKE_WEBHOOK_MESSAGE"}'
    webhook_headers = ('Content-Type'='text/json')
    comment = 'posting to Demo Slack workspace in channel PIPELINE_ALERTS'
;

In [None]:
call SYSTEM$SEND_SNOWFLAKE_NOTIFICATION(
  SNOWFLAKE.NOTIFICATION.APPLICATION_JSON('Hello from Snowflake'),
  SNOWFLAKE.NOTIFICATION.INTEGRATION('SLACK_CHANNEL_PIPELINE_ALERTS')
);

In [None]:
-- testing multiple destinations with a sample message

call SYSTEM$SEND_SNOWFLAKE_NOTIFICATION(
    array_construct(                                              -- providing multiple message formats
            SNOWFLAKE.NOTIFICATION.APPLICATION_JSON(
                'Hello from Snowflake'                            -- my json message for slack
                ),
            SNOWFLAKE.NOTIFICATION.TEXT_HTML(
                '<b>Hello from Snowflake!</b>'                    -- my html message for emails
                )
    ),
    array_construct(                                              -- multiple destinations
            SNOWFLAKE.NOTIFICATION.INTEGRATION(
                'SLACK_CHANNEL_PIPELINE_ALERTS'                   -- slack integration
                ),
            SNOWFLAKE.NOTIFICATION.EMAIL_INTEGRATION_CONFIG(
                'DEMO_EMAIL_NOTIFICATIONS',                       -- email integration
                'Snowflake DEMO Pipeline Alert',                  -- email header
                ARRAY_CONSTRUCT('{{MY_DEMO_EMAIL}}')              -- validated user email addresses
            )
    )
);

## 2. Failed Task Run alert

Keep in mind that all following Alert objects will be created in the Schema of this notebook.

(you can also just add your database or schema to the object names below)

In [None]:
-- schema context for creating Alert objects

select 
    current_database(), 
    current_schema();

We start by setting up an alert for any failed Task run within out Database by checking INFORMATION_SCHEMA.TASK_HISTORY for any entries with "FAILED" or "FAILED_AND_AUTO_SUSPENDED" state.

Let's first test run our condition:

In [None]:
select 
    distinct SCHEMA_NAME||'.'||NAME as TASK
from 
    table(INFORMATION_SCHEMA.TASK_HISTORY(
        SCHEDULED_TIME_RANGE_START => timeadd('DAY', -1, current_timestamp),
        SCHEDULED_TIME_RANGE_END => current_timestamp,
        ERROR_ONLY => True
    )) 
;

Now we can create an alert that lists all the names of Tasks that had at least one failed run since the last check and send this as a message to our Slack channel.

To reuse the result from our condition query in our notification message we can use the GET_CONDITION_QUERY_UUID() system function (https://docs.snowflake.com/en/sql-reference/functions/get_condition_query_uuid).

In [None]:
create or replace alert FAILED_TASK_ALERT
--- no warehouse selected to run serverless
schedule='using CRON 0 8 08 * MON-FRI UTC'          -- adjust to your timezone or preferred frequency
if (exists (
    select 
        NAME,
        SCHEMA_NAME
    from 
        table(INFORMATION_SCHEMA.TASK_HISTORY(
            SCHEDULED_TIME_RANGE_START => (greatest(timeadd('DAY', -7, current_timestamp),  SNOWFLAKE.ALERT.LAST_SUCCESSFUL_SCHEDULED_TIME())),     -- if last check is beyond history retention period then use last week instead
            SCHEDULED_TIME_RANGE_END => SNOWFLAKE.ALERT.SCHEDULED_TIME(),
            ERROR_ONLY => True)) 
        )
    ) 
then          
    begin
        let TASK_NAMES string := (
            select
                listagg(distinct(SCHEMA_NAME||'.'||NAME),', ') as FAILED_TASKS
            from 
                table(result_scan(SNOWFLAKE.ALERT.GET_CONDITION_QUERY_UUID()))); -- results of the condition query above
          
        call SYSTEM$SEND_SNOWFLAKE_NOTIFICATION(
                    SNOWFLAKE.NOTIFICATION.APPLICATION_JSON(
                        'Tasks '||:TASK_NAMES ||' failed since '||(greatest(timeadd('DAY', -7, current_timestamp), SNOWFLAKE.ALERT.LAST_SUCCESSFUL_SCHEDULED_TIME()))        -- my json message for slack
                        ),                                               
                    SNOWFLAKE.NOTIFICATION.INTEGRATION(
                        'SLACK_CHANNEL_PIPELINE_ALERTS'                                -- slack integration
                        )           
        );
     end;
;

In [None]:
alter alert FAILED_TASK_ALERT resume;

In [None]:
execute alert FAILED_TASK_ALERT;

## 3. Pipe Alert setup

Now we set up a similar alert but for a specific Pipe by checking INFORMATION_SCHEMA.COPY_HISTORY for failed copies:

In [None]:
select 
    STATUS,
    to_char(convert_timezone('Europe/Berlin', PIPE_RECEIVED_TIME), 'YYYY-MM-DD at HH:MI:SS') as PIPE_RECEIVED_TIME
from
    table(INFORMATION_SCHEMA.COPY_HISTORY(
        TABLE_NAME => 'IMPORTED_WEATHER',
        START_TIME => timeadd('day', -1, current_timestamp)
        )
    )
where
    PIPE_NAME = 'LOAD_DAILY_WEATHER' and   
    upper(STATUS) != 'LOADED'
order by
    PIPE_RECEIVED_TIME desc
;

this time we send the message to our email address:

In [None]:
create or replace alert DAILY_WEATHER_PIPE_INCIDENT
--- no warehouse selected to run serverless
schedule = '60 minutes'
if (exists(
    select 
        PIPE_RECEIVED_TIME
    from
        table(INFORMATION_SCHEMA.COPY_HISTORY(
            TABLE_NAME => 'IMPORTED_WEATHER',
            START_TIME => SNOWFLAKE.ALERT.LAST_SUCCESSFUL_SCHEDULED_TIME(),     -- check since last alert run
            END_TIME => SNOWFLAKE.ALERT.SCHEDULED_TIME()                        -- avoiding overlap or gaps
            )
        )
    where
        PIPE_NAME = 'LOAD_DAILY_WEATHER'
        and upper(STATUS) != 'LOADED'
    ))
    
then
    begin
        let COPY_ISSUES string := (
            select 
                count(PIPE_RECEIVED_TIME)
            from
                table(result_scan(SNOWFLAKE.ALERT.GET_CONDITION_QUERY_UUID()))); -- results of the condition query above
             
        call SYSTEM$SEND_SNOWFLAKE_NOTIFICATION(
            SNOWFLAKE.NOTIFICATION.TEXT_HTML(
                'Pipe LOAD_DAILY_WEATHER had '||:COPY_ISSUES||' failed or partial copies!'        -- my html message for emails
                ),
            SNOWFLAKE.NOTIFICATION.EMAIL_INTEGRATION_CONFIG(
                'DEMO_EMAIL_NOTIFICATIONS',                       -- email integration
                'Snowflake DEMO Pipeline Alert',                  -- email header
                array_construct('{{MY_DEMO_EMAIL}}')              -- validated user email addresses
                )
        );
    end;

In [None]:
alter alert DAILY_WEATHER_PIPE_INCIDENT resume;

In [None]:
execute alert DAILY_WEATHER_PIPE_INCIDENT;

## 4. Dynamic Tables Alert setup

For Dynamic Tables we set up an alert not just for failed refreshes but more generally when the data lag (freshness) of any Dynamic Table in our database is above the target for more than 90% of the last 24 hours.

Here we send notification to both email and Slack channel:

In [None]:
create or replace alert DT_LAGGING
--- no warehouse selected to run serverless
schedule='using CRON 0 8 05 * MON-FRI UTC'
if (exists (
    select 
        NAME,
        SCHEMA_NAME
    from 
        table(INFORMATION_SCHEMA.DYNAMIC_TABLES(
                REFRESH_DATA_TIMESTAMP_START => SNOWFLAKE.ALERT.LAST_SUCCESSFUL_SCHEDULED_TIME(),
                RESULT_LIMIT => 10000
            )) 
    where 
        TIME_WITHIN_TARGET_LAG_RATIO < 0.9
      )
  )
then          
    begin
        let DT_NAMES string := (
            select
                listagg(distinct(SCHEMA_NAME||'.'||NAME),', ') as LATE_DTS
            from 
                table(result_scan(SNOWFLAKE.ALERT.GET_CONDITION_QUERY_UUID()))); -- results of the condition query above

        call SYSTEM$SEND_SNOWFLAKE_NOTIFICATION(
            array_construct(                                                -- providing multiple message formats
                    SNOWFLAKE.NOTIFICATION.APPLICATION_JSON(
                        'Dynamic Tables(s) '||:DT_NAMES ||' less than 90% of the last 24 hours within target lag.'        -- my json message for slack
                        ),
                    SNOWFLAKE.NOTIFICATION.TEXT_HTML(
                        '<b>Dynamic Tables(s) '||:DT_NAMES ||' less than 90% of the last 24 hours within target lag.</b>'        -- my html message for emails
                        )
            ),
            array_construct(                                                -- multiple destinations
                    SNOWFLAKE.NOTIFICATION.INTEGRATION(
                        'SLACK_CHANNEL_PIPELINE_ALERTS'                     -- slack integration
                        ),
                    SNOWFLAKE.NOTIFICATION.EMAIL_INTEGRATION_CONFIG(
                        'DEMO_EMAIL_NOTIFICATIONS',                         -- email integration
                        'Snowflake DEMO Pipeline Alert',                    -- email header
                        ARRAY_CONSTRUCT('{{MY_DEMO_EMAIL}}')                -- validated user email addresses
                    )
            )
        );
    end;

In [None]:
alter alert DT_LAGGING resume;

In [None]:
execute alert DT_LAGGING;

## 5. Check Alerts History and Notification History

Now we can see which Alerts ran and if their condition triggered a notification.
We can also see when notifications were sent out.

In [None]:
select
    to_char(convert_timezone('Europe/Berlin', SCHEDULED_TIME), 'YYYY-MM-DD at HH:MI:SS') as SCHEDULED_TIME,
    NAME,
    STATE,
    SQL_ERROR_MESSAGE,      -- in case an Alert itself failed
    TIMEDIFF(second, SCHEDULED_TIME, COMPLETED_TIME) as DURATION_IN_S,
    SCHEMA_NAME
from 
    table (INFORMATION_SCHEMA.ALERT_HISTORY())
where
    STATE != 'SCHEDULED'
order by
    SCHEDULED_TIME desc
limit 
    20
;

In [None]:
--- see when notifications were sent out

select
    to_char(convert_timezone('Europe/Berlin', PROCESSED), 'YYYY-MM-DD at HH:MI:SS') as PROCESSED,
    INTEGRATION_NAME,
    STATUS,
    ERROR_MESSAGE
from 
    table(INFORMATION_SCHEMA.NOTIFICATION_HISTORY(
      START_TIME=>dateadd('hour',-24,current_timestamp()),
      END_TIME=>current_timestamp()
      ))
where
    INTEGRATION_NAME in ('SLACK_CHANNEL_PIPELINE_ALERTS', 'DEMO_EMAIL_NOTIFICATIONS')
order by
    PROCESSED desc;

### Bonus tip:

Build your custom Alerts Monitoring Dashboard with Streamlit or Snowsight Dashboards

* adjust to your local timezone in line 30

In [None]:
import streamlit as st
import pandas as pd
import altair as alt
session = get_active_session()

st.header('My Pipeline Alerts')

ALERTS = session.sql("""
        with LATEST_ALERTS as (
            select
                NAME as ALERT_NAME,
                DATABASE_NAME,
                SCHEMA_NAME,
                max(SCHEDULED_TIME) as LATEST_SCHEDULED_TIME,
                array_agg(case 
                            when STATE = 'TRIGGERED' then '🚨'
                            when STATE = 'CONDITION_FALSE' then '✅'
                            else '⚠️' end) within group (order by SCHEDULED_TIME desc) as STATE_HISTORY,            
            from
                table (SNOWFLAKE.INFORMATION_SCHEMA.ALERT_HISTORY())
            where
                SCHEDULED_TIME between timeadd(day, -7, current_timestamp) and current_timestamp
            group by
                NAME,
                DATABASE_NAME,
                SCHEMA_NAME
        )
        select
            L.ALERT_NAME,
            --LATEST_SCHEDULED_TIME,
            concat(to_char(convert_timezone('Europe/Berlin', LATEST_SCHEDULED_TIME), 'YYYY-MM-DD at HH:MI:SS'),' (',(timediff(minute, LATEST_SCHEDULED_TIME, current_timestamp())),' minutes ago)') as LAST_RUN,
            case when D.STATE = 'TRIGGERED' then ('🚨 Triggered')
                 when D.STATE = 'CONDITION_FALSE' then ('✅ Condition False')
                 when D.STATE = 'CONDITION_FAILED' then ('⚠️ Condition Failed')
                 when D.STATE = 'ACTION_FAILED' then ('⚠️ Action Failed')
                 else concat('❌ ', D.STATE)
                 end as LAST_RESULT,
            STATE_HISTORY,
            L.DATABASE_NAME,
            L.SCHEMA_NAME
        from
            LATEST_ALERTS L
        join
            table (SNOWFLAKE.INFORMATION_SCHEMA.ALERT_HISTORY()) D
            on L.ALERT_NAME = D.NAME
            and L.DATABASE_NAME = D.DATABASE_NAME
            and L.SCHEMA_NAME = D.SCHEMA_NAME
            and L.LATEST_SCHEDULED_TIME = D.SCHEDULED_TIME
        order by
            LAST_RUN desc
        limit 
            100
        """).to_pandas()



ALL_ALERTS_HISTOGRAM = session.sql("""
    select
        count(distinct case when STATE = 'TRIGGERED'                            then NAME || '|' || SCHEMA_NAME || '|' || DATABASE_NAME end) as TRIGGERED,
        count(distinct case when STATE = 'CONDITION_FALSE'                      then NAME || '|' || SCHEMA_NAME || '|' || DATABASE_NAME end) as CONDITION_FALSE,
        count(distinct case when STATE in ('ACTION_FAILED', 'CONDITION_FAILED') then NAME || '|' || SCHEMA_NAME || '|' || DATABASE_NAME end) as ALERT_FAILED,
        date_trunc(hour,SCHEDULED_TIME) as HOUR
    from
        table (SNOWFLAKE.INFORMATION_SCHEMA.ALERT_HISTORY())
    where
        SCHEDULED_TIME between timeadd(day, -7, current_timestamp) and current_timestamp
    group by
        HOUR
    order by
        HOUR desc
    """).to_pandas()
 
MELTED_DF = ALL_ALERTS_HISTOGRAM.melt('HOUR', var_name='RESULT', value_name='COUNTER')
    
CHART = alt.Chart(MELTED_DF).mark_bar(size=5).encode(
        x=alt.X('HOUR:T', axis=alt.Axis(title='Distinct Alerts running per hour')), 
        y=alt.Y('COUNTER:Q', axis=alt.Axis(title=None)), 
        color=alt.Color('RESULT:N', legend=None,
                scale=alt.Scale(domain=['TRIGGERED', 'CONDITION_FALSE', 'ALERT_FAILED'], range=['#FF0000', '#008000', '#FFA500']))
        ).properties(height=240)

st.altair_chart(CHART, use_container_width=True)






st.dataframe(ALERTS,
             column_config={
                "STATE_HISTORY": st.column_config.ListColumn("History (last 7 days)")
             },
             hide_index= True, use_container_width=True)




with st.expander('Show Alerts History'):
    ALERTS_HISTORY = session.sql("""
        select
            SCHEDULED_TIME,
            NAME,
            STATE,
            TIMEDIFF(second, SCHEDULED_TIME, COMPLETED_TIME) as DURATION_IN_S,
            DATABASE_NAME,
            SCHEMA_NAME
        from 
            table (SNOWFLAKE.INFORMATION_SCHEMA.ALERT_HISTORY())
        where
            SCHEDULED_TIME between timeadd(day, -7, current_timestamp) and current_timestamp
        order by
            SCHEDULED_TIME desc
        limit 
            100
    """).collect()
    st.dataframe(ALERTS_HISTORY, hide_index= True, use_container_width=True)