From 1968e8ebcf3e02ccc44213bffde508318f154245 Mon Sep 17 00:00:00 2001 From: Sunnyhaze Date: Sun, 13 Jul 2025 17:11:40 +0800 Subject: [PATCH] [storage] add error logging when don't call step before first run. --- dataflow/utils/storage.py | 7 +++++++ test/small_functions/translation.py | 8 +++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/dataflow/utils/storage.py b/dataflow/utils/storage.py index ed5e6595..7514aabc 100644 --- a/dataflow/utils/storage.py +++ b/dataflow/utils/storage.py @@ -40,6 +40,9 @@ def __init__(self, self.logger = get_logger() def _get_cache_file_path(self, step) -> str: + if step == -1: + self.logger.error("You must call storage.step() before reading or writing data. Please call storage.step() first for each operator step.") + raise ValueError("You must call storage.step() before reading or writing data. Please call storage.step() first for each operator step.") if step == 0: # If it's the first step, use the first entry file name return os.path.join(self.first_entry_file_name) @@ -56,6 +59,10 @@ def reset(self): def _load_local_file(self, file_path: str, file_type: str) -> pd.DataFrame: """Load data from local file based on file type.""" + # check if file exists + if not os.path.exists(file_path): + raise FileNotFoundError(f"File {file_path} does not exist. Please check the path.") + # Load file based on type try: if file_type == "json": return pd.read_json(file_path) diff --git a/test/small_functions/translation.py b/test/small_functions/translation.py index 0ffa2b31..ea773db0 100644 --- a/test/small_functions/translation.py +++ b/test/small_functions/translation.py @@ -5,7 +5,7 @@ class GPT_generator(): def __init__(self): self.storage = FileStorage( - first_entry_file_name="../../dataflow/example/GeneralTextPipeline/translation.jsonl", + first_entry_file_name="../../dataflow/example/GeneralTextPipeline/translation.jsonl1", cache_path="./cache", file_name_prefix="translation", cache_type="jsonl", @@ -16,13 +16,15 @@ def __init__(self): model_name="gpt-4o", max_workers=2 ) - self.prompt_generator = PromptedGenerator(llm_serving = self.llm_serving) + self.prompt_generator = PromptedGenerator( + llm_serving = self.llm_serving, + system_prompt = "Please translate to Chinese.", + ) def forward(self): # Initial filters self.prompt_generator.run( storage = self.storage.step(), - system_prompt = "Please translate to Chinese.", input_key = "raw_content", )