In [1]:
# Install the library
!pip install -q "great-expectations==0.18.19"

# Create a data context
from great_expectations.data_context import FileDataContext

context = FileDataContext.create(project_root_dir="./")

# Give a name to a Datasource. This name must be unique between Datasources.
datasource_name = 'fclimate-change'
datasource = context.sources.add_pandas(datasource_name)

# Give a name to a data asset
asset_name = 'climate_agriculture'
path_to_data = '/Users/firaldichandra/Documents/Final Project/climate_change_impact_on_agriculture_2024.csv'
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Build batch request
batch_request = asset.build_batch_request()

# Create an expectation suite
expectation_suite_name = 'expectation-climate-agriculture'
context.add_or_update_expectation_suite(expectation_suite_name)

# Create a validator using above expectation suite
validator = context.get_validator(
    batch_request=batch_request,
    expectation_suite_name=expectation_suite_name
)

# Check the validator
validator.head()



Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Year,Country,Region,Crop_Type,Average_Temperature_C,Total_Precipitation_mm,CO2_Emissions_MT,Crop_Yield_MT_per_HA,Extreme_Weather_Events,Irrigation_Access_%,Pesticide_Use_KG_per_HA,Fertilizer_Use_KG_per_HA,Soil_Health_Index,Adaptation_Strategies,Economic_Impact_Million_USD
0,2001,India,West Bengal,Corn,1.55,447.06,15.22,1.737,8,14.54,10.08,14.78,83.25,Water Management,808.13
1,2024,China,North,Corn,3.23,2913.57,29.82,1.737,8,11.05,33.06,23.25,54.02,Crop Rotation,616.22
2,2001,France,Ile-de-France,Wheat,21.11,1301.74,25.75,1.719,5,84.42,27.41,65.53,67.78,Water Management,796.96
3,2001,Canada,Prairies,Coffee,27.85,1154.36,13.91,3.89,5,94.06,14.38,87.58,91.39,No Adaptation,790.32
4,1998,India,Tamil Nadu,Sugarcane,2.19,1627.48,11.81,1.08,9,95.75,44.35,88.08,49.61,Crop Rotation,401.72


In [5]:
# Year harus integer, antara 1990–2025.
validator.expect_column_values_to_be_between("Year", min_value=1990, max_value=2025)
validator.expect_column_values_to_be_of_type("Year", "int64")

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "observed_value": "int64"
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [6]:
# Country, Region, Crop_Type, Adaptation_Strategies harus string dan tidak boleh kosong.
for col in ["Country", "Region", "Crop_Type", "Adaptation_Strategies"]:
    validator.expect_column_values_to_not_be_null(col)
    validator.expect_column_values_to_be_of_type(col, "object")

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
# Kolom numerik seperti Average_Temperature_C, Total_Precipitation_mm, CO2_Emissions_MT tidak boleh NULL.
for col in [
    "Average_Temperature_C",
    "Total_Precipitation_mm",
    "CO2_Emissions_MT",
    "Crop_Yield_MT_per_HA",
    "Irrigation_Access_%",
    "Extreme_Weather_Events",
    "Economic_Impact_Million_USD"
]:
    validator.expect_column_values_to_not_be_null(col)
    validator.expect_column_values_to_be_of_type(col, "float64")

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/1 [00:00<?, ?it/s]

In [8]:
# VALIDASI NILAI NUMERIK
validator.expect_column_values_to_be_between("Average_Temperature_C", min_value=-10, max_value=50)
validator.expect_column_values_to_be_between("Total_Precipitation_mm", min_value=0)
validator.expect_column_values_to_be_between("CO2_Emissions_MT", min_value=0)
validator.expect_column_values_to_be_between("Crop_Yield_MT_per_HA", min_value=0)
validator.expect_column_values_to_be_between("Irrigation_Access_%", min_value=0, max_value=100)
validator.expect_column_values_to_be_between("Extreme_Weather_Events", min_value=0)
validator.expect_column_values_to_be_between("Economic_Impact_Million_USD", min_value=0)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 10000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [9]:
# Crop_Type hanya berisi daftar yang valid 
valid_crops = ["Wheat", "Rice", "Corn", "Coffee", "Sugarcane", "Fruits", "Barley", "Vegetables", "Soybeans","Cotton"]
validator.expect_column_values_to_be_in_set("Crop_Type", valid_crops)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 10000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [10]:
# Adaptation_Strategies berisi daftar data yang valid
valid_strategies = [
    "Water Management",
    "Crop Rotation",
    "No Adaptation",
    "Organic Farming",
    "Drought-resistant Crops",
]
validator.expect_column_values_to_be_in_set("Adaptation_Strategies", valid_strategies)

Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

{
  "success": true,
  "result": {
    "element_count": 10000,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "partial_unexpected_list": [],
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}