# Great Expectations (Pandas + Slack Notification)

Use Great Expectations Core v1.0+ in a code-first workflow to validate a dataset and send a Slack alert when validation fails.

In [None]:
!pip install great_expectations pandas

In [None]:
import pandas as pd
import great_expectations as gx
from great_expectations.expectations import (
    ExpectColumnValuesToNotBeNull,
    ExpectColumnValuesToBeUnique,
    ExpectColumnValuesToBeBetween,
    ExpectColumnValuesToBeInSet
)

# Read the data
df = pd.read_csv("/content/drive/MyDrive/Colab Data/Amazon Sale Report.csv", low_memory=False)
print(df.info())


In [None]:
# Let's clear the column names
# 'Order ID' -> 'order_id'
df.columns = [c.replace(' ', '_').lower() for c in df.columns]

# Examine the diagram
print(df.info())

In [None]:
print(df['status'].value_counts())

In [None]:
# 1. Context start
context = gx.get_context()

# 2. Create an Expectation Suite
suite = context.suites.add(gx.ExpectationSuite(name="amazon_orders_suite"))

# 3. Add Expectations
suite.add_expectation(ExpectColumnValuesToNotBeNull(column="order_id"))
suite.add_expectation(ExpectColumnValuesToBeUnique(column="order_id"))
suite.add_expectation(ExpectColumnValuesToBeBetween(column="qty", min_value=0))
suite.add_expectation(ExpectColumnValuesToBeBetween(column="amount", min_value=0))

# Permitted set for status
allowed_status = [
    "Shipped",
    "Shipped - Delivered to Buyer",
    "Cancelled",
    "Shipped - Returned to Seller",
    "Shipped - Picked Up",
    "Pending",
    "Pending - Waiting for Pick Up",
    "Shipped - Returning to Seller",
    "Shipped - Out for Delivery",
    "Shipped - Rejected by Buyer",
    "Shipping",
    "Shipped - Lost in Transit",
    "Shipped - Damaged"
]

# Add expectation
suite.add_expectation(
    ExpectColumnValuesToBeInSet(
        column="status",
        value_set=allowed_status
    )
)

In [None]:
import requests

# 1. Data Source
try:
    data_source = context.data_sources.get(name="my_source")
except:
    data_source = context.data_sources.add_pandas(name="my_source")

# 2. Data Asset
try:
    data_asset = data_source.get_asset(name="my_asset")
except:
    data_asset = data_source.add_dataframe_asset(name="my_asset")

# 3. Batch Definition
# GX v1.0+
try:
    batch_definition = data_asset.get_batch_definition_daily(name="my_batch_def")
except:

    batch_definition = data_asset.add_batch_definition_whole_dataframe(name="my_batch_def")

# 4. Validation Definition
try:
    validation_def = context.validation_definitions.get(name="amazon_val")
except:
    validation_def = context.validation_definitions.add(
        gx.ValidationDefinition(
            name="amazon_val",
            data=batch_definition,
            suite=suite
        )
    )


results = validation_def.run(batch_parameters={"dataframe": df})

# Slack Notification Function
def send_slack_alert(results, webhook_url):
    stats = results.statistics
    failures = [res.expectation_config.type for res in results.results if not res.success]

    summary = (
        f"üö® *GX Validation Failed!*\n"
        f"‚úÖ Successful: {stats['successful_expectations']}\n"
        f"‚ùå Unsuccessful: {stats['unsuccessful_expectations']}\n"
        f"üõ† Errors: {', '.join(failures)}"
    )

    requests.post(webhook_url, json={"text": summary})

# If unsuccessful, send to Slack
if not results.success:
    MY_WEBHOOK_URL = "YOUR_WEBHOOK_URL_HERE"
    send_slack_alert(results, MY_WEBHOOK_URL)
    print("An error has been detected and a Slack notification has been sent!")
else:
    print("All checks were passed successfully.")