### Task 1: Validate Data with a Custom Expectation in Great Expectations
**Description**: Create a custom expectation and validate data with Great Expectations.

**Load a sample DataFrame**

data = {
'age': [25, 30, 35, 40, 45],
'income': [50000, 60000, 75000, None, 100000]
}

In [7]:
import pandas as pd
import great_expectations as ge
from great_expectations.dataset import PandasDataset

# Load a sample DataFrame
data = {'age': [25, 30, 35, 40, 45], 'income': [50000, 60000, 75000, None, 100000]}
df = pd.DataFrame(data)

# Convert pandas DataFrame to Great Expectations dataset
ge_df = ge.from_pandas(df)

# Create a custom expectation to check if 'age' is greater than or equal to 18
def expect_age_greater_than_18(self):
    return self.expect_column_values_to_be_in_set("age", set(range(18, 120)))

# Register the custom expectation
PandasDataset.expect_age_greater_than_18 = expect_age_greater_than_18

# Validate the DataFrame using the custom expectation
result = ge_df.expect_age_greater_than_18()

# Print the validation results
print(result)


ModuleNotFoundError: No module named 'great_expectations.dataset'

### Task 2: Implement a Basic Alert System for Data Quality Drops
**Description**: Set up a basic alert system that triggers when data quality drops.

In [None]:
# Write your code from here
import pandas as pd
import great_expectations as ge
from great_expectations.dataset import PandasDataset
import smtplib
from email.mime.text import MIMEText

# Function to send an email alert
def send_email_alert(message):
    from_email = "your_email@example.com"
    to_email = "recipient_email@example.com"
    msg = MIMEText(message)
    msg["Subject"] = "Data Quality Alert"
    msg["From"] = from_email
    msg["To"] = to_email

    try:
        with smtplib.SMTP('smtp.example.com') as server:  # Use your SMTP server
            server.login('your_email@example.com', 'your_password')  # Login to SMTP server
            server.sendmail(from_email, to_email, msg.as_string())
        print("Alert sent successfully!")
    except Exception as e:
        print(f"Failed to send alert: {e}")

# Load a sample DataFrame
data = {'age': [25, 30, 35, 40, 45], 'income': [50000, 60000, 75000, None, 100000]}
df = pd.DataFrame(data)

# Convert pandas DataFrame to Great Expectations dataset
ge_df = ge.from_pandas(df)

# Check if there are any missing values in the 'income' column
if ge_df['income'].isnull().any():
    # Send an email alert if missing data is found
    send_email_alert("Data Quality Alert: Missing values found in the 'income' column.")
else:
    print("Data Quality Check Passed: No missing values in 'income'.")


ModuleNotFoundError: No module named 'great_expectations.dataset'

### Task 3: Real-time Data Quality Monitoring with Python and Great Expectations
**Description**: Implement a system that monitors data quality in real-time.

In [None]:
# Write your code from here
import pandas as pd
import great_expectations as ge
from great_expectations.dataset import PandasDataset
import time

# Function to simulate real-time data monitoring
def real_time_data_quality_check(df):
    ge_df = ge.from_pandas(df)

    # Define a basic check to see if the 'income' column contains missing values
    result = ge_df.expect_column_values_to_be_in_set('income', set(range(0, 1000000)))

    if result['success'] == False:
        print("Data Quality Issue Detected! Missing or invalid values found in the 'income' column.")
    else:
        print("Data Quality Check Passed.")

# Simulate real-time data monitoring
while True:
    # Sample Data (This could be dynamically loaded in real-time)
    data = {'age': [25, 30, 35, 40, 45], 'income': [50000, 60000, 75000, None, 100000]}
    df = pd.DataFrame(data)

    # Perform a real-time data quality check
    real_time_data_quality_check(df)
    
    # Sleep for 5 seconds before checking again (simulate real-time monitoring)
    time.sleep(5)


ModuleNotFoundError: No module named 'great_expectations.dataset'