In [None]:
# Step 1: Install Great Expectations (only run once)
!pip install great_expectations

# Step 2: Initialize Great Expectations (only run once per project)
!great_expectations init

# Step 3: Import required libraries
import pandas as pd
import great_expectations as ge
from great_expectations.render.renderer import ValidationResultsPageRenderer
from great_expectations.render.view import DefaultJinjaPageView

# Step 4: Load your dataset
df = pd.read_csv("your_dataset.csv")  # Replace with actual path

# Step 5: Create a Great Expectations context and expectation suite
context = ge.get_context()
suite_name = "data_quality_suite"
context.create_expectation_suite(suite_name=suite_name, overwrite_existing=True)

# Step 6: Convert to GE DataFrame
ge_df = ge.from_pandas(df)

# Step 7: Add expectations (customize these for your dataset)
ge_df.expect_column_to_exist("column_1")
ge_df.expect_column_values_to_not_be_null("column_1")
ge_df.expect_column_values_to_be_between("column_2", min_value=0, max_value=100)
ge_df.expect_column_values_to_be_in_set("column_3", ["A", "B", "C"])

# Step 8: Save the expectation suite
ge_df.save_expectation_suite(suite_name=suite_name)

# Step 9: Validate the data
results = ge_df.validate(expectation_suite_name=suite_name)

# Step 10: Generate validation report (HTML)
document_model = ValidationResultsPageRenderer().render(results)
html = DefaultJinjaPageView().render(document_model)
with open("data_quality_report.html", "w") as f:
    f.write(html)

print("Validation complete. Report saved as 'data_quality_report.html'.")