PySport · koenvo · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/ingestify/application/dataset_store.py b/ingestify/application/dataset_store.py
@@ -294,9 +294,7 @@ def load_files(
 
             def get_stream(file_):
                 return reader(
-                    self.file_repository.load_content(
-                        bucket=self.bucket, storage_path=file_.storage_path
-                    )
+                    self.file_repository.load_content(storage_path=file_.storage_path)
                 )
 
             loaded_file = LoadedFile(

diff --git a/ingestify/domain/models/dataset/file.py b/ingestify/domain/models/dataset/file.py
@@ -116,7 +116,7 @@ class LoadedFile(BaseModel):
     data_serialization_format: Optional[str]  # Example: 'json'
     storage_compression_method: Optional[str]  # Example: 'gzip'
     storage_path: Path
-    _stream: Union[BinaryIO, Callable[[], Awaitable[BinaryIO]]]
+    _stream: Union[BinaryIO, BytesIO, Callable[[], Awaitable[Union[BinaryIO, BytesIO]]]]
     revision_id: Optional[int] = None  # This can be used when a Revision is squashed
 
     def load_stream(self):

diff --git a/ingestify/domain/models/ingestion/ingestion_job_summary.py b/ingestify/domain/models/ingestion/ingestion_job_summary.py
@@ -112,7 +112,7 @@ def output_report(self):
 
         print(f"    - Failed tasks: {self.failed_tasks}")
         print(f"    - Successful tasks: {self.successful_tasks}")
-        print(f"    - Successful ignored tasks: {self.successful_tasks}")
+        print(f"    - Successful ignored tasks: {self.ignored_successful_tasks}")
         print(f"    - Skipped datasets: {self.skipped_datasets}")
         print("--------------------")
 

diff --git a/ingestify/tests/test_engine.py b/ingestify/tests/test_engine.py
@@ -245,6 +245,13 @@ def test_engine(config_file):
     items = list(engine.store.dataset_repository.session.query(IngestionJobSummary))
     print(items)
 
+    # Make sure we can load the files
+    files = engine.store.load_files(datasets.first(), lazy=True)
+    assert files.get_file("file1").stream.read() == b"content1"
+
+    files = engine.store.load_files(datasets.first(), lazy=False)
+    assert files.get_file("file1").stream.read() == b"content1"
+
 
 def test_iterator_source(config_file):
     """Test when a Source returns a Iterator to do Batch processing.