### Task 1: Validate Data with a Custom Expectation in Great Expectations
**Description**: Create a custom expectation and validate data with Great Expectations.

**Load a sample DataFrame**

data = {
'age': [25, 30, 35, 40, 45],
'income': [50000, 60000, 75000, None, 100000]
}

In [8]:
import pandas as pd
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import great_expectations as ge
from great_expectations.core import ExpectationConfiguration
import time

# Task 1: Custom Expectation using Great Expectations

# Sample DataFrame
data = {'age': [25, 30, 35, 40, 45], 'income': [50000, 60000, 75000, None, 100000]}
df = pd.DataFrame(data)

# Create a Great Expectations dataframe
ge_df = ge.from_pandas(df)

# Use Great Expectations' built-in expectation for age
ge_df.expect_column_values_to_be_between("age", 18, 120)

# Task 2: Email Alert System with Enhanced Error Handling

def send_email_alert(subject, body, recipient_email):
    try:
        # Email configuration
        sender_email = "your_email@example.com"
        password = "your_password"
        smtp_server = "smtp.example.com"
        smtp_port = 587

        # Create email message
        msg = MIMEMultipart()
        msg['From'] = sender_email
        msg['To'] = recipient_email
        msg['Subject'] = subject
        msg.attach(MIMEText(body, 'plain'))

        # Connect to the email server and send the email
        with smtplib.SMTP(smtp_server, smtp_port) as server:
            server.starttls()  # Secure the connection
            server.login(sender_email, password)
            text = msg.as_string()
            server.sendmail(sender_email, recipient_email, text)
            print(f"Email sent successfully to {recipient_email}")
            
    except smtplib.SMTPException as e:
        print(f"SMTP error occurred: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Task 3: Real-time Data Quality Monitoring

def fetch_new_data():
    # Placeholder function to simulate fetching new data
    # Replace with actual data fetching mechanism (e.g., from a database or API)
    new_data = {'age': [50], 'income': [120000]}
    return pd.DataFrame(new_data)

def check_data_quality(data):
    # Placeholder function to simulate data quality check
    # Check for missing values or any other criteria
    if data['income'].isnull().any():
        return False
    return True

def monitor_data_quality_stream():
    while True:
        # Simulate data ingestion (Replace this with actual data fetching)
        data = fetch_new_data()

        # Perform data quality checks
        if not check_data_quality(data):
            send_email_alert(
                "Data quality issue detected",
                "Data quality is below acceptable thresholds.",
                "recipient@example.com"
            )

        # Wait for new data or events
        time.sleep(60)  # Wait for 1 minute, adjust as needed

# Call the function to start monitoring
monitor_data_quality_stream()


ImportError: cannot import name 'ExpectationConfiguration' from 'great_expectations.core' (/home/vscode/.local/lib/python3.10/site-packages/great_expectations/core/__init__.py)

### Task 2: Implement a Basic Alert System for Data Quality Drops
**Description**: Set up a basic alert system that triggers when data quality drops.

In [None]:
# Write your code from here
import pandas as pd
import great_expectations as ge
from great_expectations.dataset import PandasDataset
import smtplib
from email.mime.text import MIMEText

# Function to send an email alert
def send_email_alert(message):
    from_email = "your_email@example.com"
    to_email = "recipient_email@example.com"
    msg = MIMEText(message)
    msg["Subject"] = "Data Quality Alert"
    msg["From"] = from_email
    msg["To"] = to_email

    try:
        with smtplib.SMTP('smtp.example.com') as server:  # Use your SMTP server
            server.login('your_email@example.com', 'your_password')  # Login to SMTP server
            server.sendmail(from_email, to_email, msg.as_string())
        print("Alert sent successfully!")
    except Exception as e:
        print(f"Failed to send alert: {e}")

# Load a sample DataFrame
data = {'age': [25, 30, 35, 40, 45], 'income': [50000, 60000, 75000, None, 100000]}
df = pd.DataFrame(data)

# Convert pandas DataFrame to Great Expectations dataset
ge_df = ge.from_pandas(df)

# Check if there are any missing values in the 'income' column
if ge_df['income'].isnull().any():
    # Send an email alert if missing data is found
    send_email_alert("Data Quality Alert: Missing values found in the 'income' column.")
else:
    print("Data Quality Check Passed: No missing values in 'income'.")


ModuleNotFoundError: No module named 'great_expectations.dataset'

### Task 3: Real-time Data Quality Monitoring with Python and Great Expectations
**Description**: Implement a system that monitors data quality in real-time.

In [None]:
# Write your code from here
import pandas as pd
import great_expectations as ge
from great_expectations.dataset import PandasDataset
import time

# Function to simulate real-time data monitoring
def real_time_data_quality_check(df):
    ge_df = ge.from_pandas(df)

    # Define a basic check to see if the 'income' column contains missing values
    result = ge_df.expect_column_values_to_be_in_set('income', set(range(0, 1000000)))

    if result['success'] == False:
        print("Data Quality Issue Detected! Missing or invalid values found in the 'income' column.")
    else:
        print("Data Quality Check Passed.")

# Simulate real-time data monitoring
while True:
    # Sample Data (This could be dynamically loaded in real-time)
    data = {'age': [25, 30, 35, 40, 45], 'income': [50000, 60000, 75000, None, 100000]}
    df = pd.DataFrame(data)

    # Perform a real-time data quality check
    real_time_data_quality_check(df)
    
    # Sleep for 5 seconds before checking again (simulate real-time monitoring)
    time.sleep(5)


ModuleNotFoundError: No module named 'great_expectations.dataset'