In [1]:
import os
from tfx.components import CsvExampleGen
from tfx.proto import example_gen_pb2
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

# Create an interactive context (for notebook or local interactive session)
context = InteractiveContext()

# Prepare data and output configuration
base_dir = os.getcwd()
data_dir = os.path.join(base_dir, "data2")

# Define split configuration
output = example_gen_pb2.Output(
    split_config=example_gen_pb2.SplitConfig(
        splits=[
            example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=6),
            example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=2),
            example_gen_pb2.SplitConfig.Split(name='test', hash_buckets=2)
        ]
    )
)

# Define the ExampleGen component
example_gen = CsvExampleGen(input_base=data_dir, output_config=output)

# Run the component
context.run(example_gen)


import glob

artifacts = example_gen.outputs['examples'].get()

print("\n=== Generated Artifacts ===")
for artifact in artifacts:
    print(f"Artifact URI (base): {artifact.uri}")
    
    # Manually list split directories inside artifact.uri
    split_dirs = [d for d in os.listdir(artifact.uri) if os.path.isdir(os.path.join(artifact.uri, d))]
    
    for split_name in split_dirs:
        split_path = os.path.join(artifact.uri, split_name)
        print(f"  Split: {split_name}")
        print(f"  Split Path: {split_path}")
        
        # List files in this split
        files = glob.glob(os.path.join(split_path, "*"))
        for file in files:
            print(f"    File: {file}")
    
    print("----------------------------")



# for artifact in example_gen.outputs["examples"].get():
#     print(artifact)

2025-06-11 21:30:48.609509: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-11 21:30:48.610052: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-11 21:30:48.611920: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-11 21:30:48.617997: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-11 21:30:48.630037: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registe


=== Generated Artifacts ===
Artifact URI (base): /tmp/tfx-interactive-2025-06-11T21_30_51.356247-pbsm9bfd/CsvExampleGen/examples/1
  Split: Split-train
  Split Path: /tmp/tfx-interactive-2025-06-11T21_30_51.356247-pbsm9bfd/CsvExampleGen/examples/1/Split-train
    File: /tmp/tfx-interactive-2025-06-11T21_30_51.356247-pbsm9bfd/CsvExampleGen/examples/1/Split-train/data_tfrecord-00000-of-00001.gz
  Split: Split-eval
  Split Path: /tmp/tfx-interactive-2025-06-11T21_30_51.356247-pbsm9bfd/CsvExampleGen/examples/1/Split-eval
    File: /tmp/tfx-interactive-2025-06-11T21_30_51.356247-pbsm9bfd/CsvExampleGen/examples/1/Split-eval/data_tfrecord-00000-of-00001.gz
  Split: Split-test
  Split Path: /tmp/tfx-interactive-2025-06-11T21_30_51.356247-pbsm9bfd/CsvExampleGen/examples/1/Split-test
    File: /tmp/tfx-interactive-2025-06-11T21_30_51.356247-pbsm9bfd/CsvExampleGen/examples/1/Split-test/data_tfrecord-00000-of-00001.gz
----------------------------


In [2]:
!cp /tmp/tfx-interactive-*/CsvExampleGen/examples/1/Split-eval/*.gz ~/Data\ Ingestion\ with\ TFX/
