In [37]:
import os
import great_expectations as gx

# 1. Inicializacija Great Expectations konteksta
context = gx.get_context()

# 2. Ime datasourca in baza podatkovne poti
datasource_name = "air_quality"
base_dir = "../data/preprocessed/air"

# 3. Ustvari datasource (če še ni)
if datasource_name not in [ds.name for ds in context.list_datasources()]:
    datasource = context.sources.add_pandas_filesystem(
        name=datasource_name,
        base_directory=base_dir
    )
else:
    datasource = context.get_datasource(datasource_name)

# 4. Poišči vse CSV datoteke z imeni kot "Exxx.csv"
csv_files = [f for f in os.listdir(base_dir) if f.startswith("E") and f.endswith(".csv")]

# 5. Ustvari asset za vsako najdeno datoteko
for csv_file in csv_files:
    asset_name = f"air_quality_data_{csv_file.replace('.csv', '')}"

    if asset_name not in [a.name for a in datasource.assets]:
        datasource.add_csv_asset(
            name=asset_name,
            batching_regex=rf"{csv_file}"
        )
        print(f"✅ Added asset: {asset_name}")
    else:
        print(f"ℹ️ Asset already exists: {asset_name}")


✅ Added asset: air_quality_data_E403
✅ Added asset: air_quality_data_E404
✅ Added asset: air_quality_data_E405
✅ Added asset: air_quality_data_E407
✅ Added asset: air_quality_data_E409
✅ Added asset: air_quality_data_E410
✅ Added asset: air_quality_data_E411
✅ Added asset: air_quality_data_E412
✅ Added asset: air_quality_data_E413
✅ Added asset: air_quality_data_E414
✅ Added asset: air_quality_data_E415
✅ Added asset: air_quality_data_E417
✅ Added asset: air_quality_data_E418
✅ Added asset: air_quality_data_E419
✅ Added asset: air_quality_data_E420
✅ Added asset: air_quality_data_E421
✅ Added asset: air_quality_data_E423
✅ Added asset: air_quality_data_E424
✅ Added asset: air_quality_data_E801
✅ Added asset: air_quality_data_E802
✅ Added asset: air_quality_data_E803
✅ Added asset: air_quality_data_E804


In [38]:
import os
import json
import great_expectations as gx
from great_expectations.core.expectation_suite import ExpectationSuite

# 1. Inicializiraj kontekst
context = gx.get_context()

# 2. Osnovni parametri
datasource_name = "air_quality"
base_dir = "../data/preprocessed/air"
expectation_suite_name = "air_quality_suite"
suite_file = "air_quality_suite.json"

# 3. Uvozi enoten Expectation Suite iz JSON datoteke
with open(suite_file, "r") as f:
    suite_dict = json.load(f)

suite = ExpectationSuite(**suite_dict)
context.save_expectation_suite(expectation_suite=suite)

# 4. Pridobi datasource
datasource = context.get_datasource(datasource_name)

# 5. Poišči vse CSV datoteke v mapi (npr. E410.csv, E411.csv, ...)
csv_files = [f for f in os.listdir(base_dir) if f.startswith("E") and f.endswith(".csv")]

# 6. Pojdi čez vsako datoteko
for csv_file in csv_files:
    station_code = csv_file.replace(".csv", "")
    asset_name = f"air_quality_data_{station_code}"
    checkpoint_name = f"checkpoint_{station_code}"

    print(f"\n🔧 Obdelujem: {csv_file}")

    # 6a. Dodaj asset (če še ne obstaja)
    if asset_name not in [a.name for a in datasource.assets]:
        datasource.add_csv_asset(
            name=asset_name,
            batching_regex=rf"{csv_file}"
        )

    # 6b. Pridobi asset in ustvari batch_request
    asset = datasource.get_asset(asset_name)
    batch_request = asset.build_batch_request()

    # 6c. Ustvari validator (za test)
    validator = context.get_validator(
        batch_request=batch_request,
        expectation_suite_name=expectation_suite_name
    )

    # 6d. Ustvari checkpoint (če še ne obstaja)
    try:
        context.get_checkpoint(checkpoint_name)
    except gx.exceptions.CheckpointNotFoundError:
        context.add_or_update_checkpoint(
            name=checkpoint_name,
            validations=[
                {
                    "batch_request": batch_request,
                    "expectation_suite_name": expectation_suite_name
                }
            ]
        )

    # 6e. Zaženi checkpoint
    checkpoint = context.get_checkpoint(checkpoint_name)
    result = checkpoint.run(run_id=f"{station_code}_run")

    # 6f. Rezultat validacije
    if result["success"]:
        print(f"✅ {station_code}: Validacija uspešna.")
    else:
        print(f"❌ {station_code}: Validacija padla.")

# 7. Generiraj data docs
context.build_data_docs()
context.open_data_docs()



🔧 Obdelujem: E403.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E403: Validacija uspešna.

🔧 Obdelujem: E404.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E404: Validacija uspešna.

🔧 Obdelujem: E405.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E405: Validacija uspešna.

🔧 Obdelujem: E407.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E407: Validacija uspešna.

🔧 Obdelujem: E409.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E409: Validacija uspešna.

🔧 Obdelujem: E410.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E410: Validacija uspešna.

🔧 Obdelujem: E411.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E411: Validacija uspešna.

🔧 Obdelujem: E412.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E412: Validacija uspešna.

🔧 Obdelujem: E413.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E413: Validacija uspešna.

🔧 Obdelujem: E414.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E414: Validacija uspešna.

🔧 Obdelujem: E415.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E415: Validacija uspešna.

🔧 Obdelujem: E417.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E417: Validacija uspešna.

🔧 Obdelujem: E418.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E418: Validacija uspešna.

🔧 Obdelujem: E419.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E419: Validacija uspešna.

🔧 Obdelujem: E420.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E420: Validacija uspešna.

🔧 Obdelujem: E421.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E421: Validacija uspešna.

🔧 Obdelujem: E423.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E423: Validacija uspešna.

🔧 Obdelujem: E424.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E424: Validacija uspešna.

🔧 Obdelujem: E801.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E801: Validacija uspešna.

🔧 Obdelujem: E802.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E802: Validacija uspešna.

🔧 Obdelujem: E803.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E803: Validacija uspešna.

🔧 Obdelujem: E804.csv


Calculating Metrics:   0%|          | 0/38 [00:00<?, ?it/s]

✅ E804: Validacija uspešna.
