-
Notifications
You must be signed in to change notification settings - Fork 6
Open
Labels
bugSomething isn't workingSomething isn't workingcontent: cloudContent related issues/PRs for notebooks with cloud hosted data relevanceContent related issues/PRs for notebooks with cloud hosted data relevanceupstream
Description
Example failing job: https://github.com/Caltech-IPAC/irsa-tutorials/actions/runs/19221240496/job/54939630573#step:5:1105
It's persistent enough to show after manual restart 2 hours after the first run. I would expect though that it's something upstream.
_____ tutorials\parquet-catalog-demos\irsa-hats-with-lsdb.ipynb::Cell 30 ______
Notebook cell execution failed
Cell 30: Cell execution caused an exception
Input:
def get_nworkers(catalog):
return min(os.cpu_count(), catalog.npartitions + 1)
with Client(n_workers=get_nworkers(euclid_x_ztf),
threads_per_worker=2,
memory_limit='auto') as client:
print(f"This may take more than a few minutes to complete. You can monitor progress in Dask dashboard at {client.dashboard_link}")
euclid_x_ztf_df = euclid_x_ztf.compute() # this will load the data into memory finally
Traceback:
---------------------------------------------------------------------------
ParamValidationError Traceback (most recent call last)
Cell In[1], line 8
4 with Client(n_workers=get_nworkers(euclid_x_ztf),
5 threads_per_worker=2,
6 memory_limit='auto') as client:
7 print(f"This may take more than a few minutes to complete. You can monitor progress in Dask dashboard at {client.dashboard_link}")
----> 8 euclid_x_ztf_df = euclid_x_ztf.compute() # this will load the data into memory finally
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\lsdb\catalog\dataset\dataset.py:62, in Dataset.compute(self)
60 def compute(self) -> npd.NestedFrame:
61 """Compute dask distributed dataframe to pandas dataframe"""
---> 62 return self._ddf.compute()
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\lsdb\nested\core.py:436, in NestedFrame.compute(self, **kwargs)
434 def compute(self, **kwargs):
435 """Compute this Dask collection, returning the underlying dataframe or series."""
--> 436 return npd.NestedFrame(super().compute(**kwargs))
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\dask\base.py:373, in DaskMethodsMixin.compute(self, **kwargs)
349 def compute(self, **kwargs):
350 """Compute this dask collection
351
352 This turns a lazy Dask collection into its in-memory equivalent.
(...) 371 dask.compute
372 """
--> 373 (result,) = compute(self, traverse=False, **kwargs)
374 return result
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\dask\base.py:681, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
678 expr = expr.optimize()
679 keys = list(flatten(expr.__dask_keys__()))
--> 681 results = schedule(expr, keys, **kwargs)
683 return repack(results)
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\lsdb\loaders\hats\read_hats.py:428, in read_pixel()
393 def read_pixel(
394 pixel: HealpixPixel,
395 *,
(...) 403 **kwargs,
404 ) -> npd.NestedFrame:
405 """Utility method to read a single pixel's parquet file from disk.
406
407 NB: `columns` is necessary as an argument, even if None, so that dask-expr
(...) 426 The pixel data, as read from its parquet file.
427 """
--> 428 return _read_parquet_file(
429 path_generator(
430 catalog_base_dir,
431 pixel,
432 query_url_params,
433 npix_suffix,
434 ),
435 columns=columns,
436 schema=schema,
437 index_column=index_column,
438 **kwargs,
439 )
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\lsdb\loaders\hats\read_hats.py:457, in _read_parquet_file()
450 if (
451 columns is not None
452 and schema is not None
353 logger.debug(
354 'Warning: %s.%s() is deprecated', service_name, operation_name
355 )
356 request_context = {
357 'client_region': self.meta.region_name,
358 'client_config': self.meta.config,
(...) 362 'auth_options': self._service_model.metadata.get('auth'),
363 }
--> 365 api_params = await self._emit_api_params(
366 api_params=api_params,
367 operation_model=operation_model,
368 context=request_context,
369 )
370 (
371 endpoint_url,
372 additional_headers,
(...) 375 operation_model, api_params, request_context
376 )
377 if properties:
378 # Pass arbitrary endpoint info with the Request
379 # for use during construction.
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\aiobotocore\client.py:488, in _emit_api_params()
480 responses = await self.meta.events.emit(
481 f'provide-client-params.{service_id}.{operation_name}',
482 params=api_params,
483 model=operation_model,
484 context=context,
485 )
486 api_params = first_non_none_response(responses, default=api_params)
--> 488 await self.meta.events.emit(
489 f'before-parameter-build.{service_id}.{operation_name}',
490 params=api_params,
491 model=operation_model,
492 context=context,
493 )
494 return api_params
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\aiobotocore\hooks.py:68, in _emit()
65 logger.debug('Event %s: calling handler %s', event_name, handler)
67 # Await the handler if its a coroutine.
---> 68 response = await resolve_awaitable(handler(**kwargs))
69 responses.append((handler, response))
70 if stop_on_response and response is not None:
File D:\a\irsa-tutorials\irsa-tutorials\.tox\py312-test\Lib\site-packages\botocore\handlers.py:322, in validate_bucket_name()
316 if not VALID_BUCKET.search(bucket) and not VALID_S3_ARN.search(bucket):
317 error_msg = (
318 f'Invalid bucket name "{bucket}": Bucket name must match '
319 f'the regex "{VALID_BUCKET.pattern}" or be an ARN matching '
320 f'the regex "{VALID_S3_ARN.pattern}"'
321 )
--> 322 raise ParamValidationError(report=error_msg)
ParamValidationError: Parameter validation failed:
Invalid bucket name "ipac-irsa-ztf\contributed\dr23\objects\hats\ztf_dr23_objects-hats_margin_10arcsec\dataset\Norder=4\Dir=0\Npix=639.parquet": Bucket name must match the regex "^[a-zA-Z0-9.\-_]{1,255}$" or be an ARN matching the regex "^arn:(aws).*:(s3|s3-object-lambda):[a-z\-0-9]*:[0-9]{12}:accesspoint[/:][a-zA-Z0-9\-.]{1,63}$|^arn:(aws).*:s3-outposts:[a-z\-0-9]+:[0-9]{12}:outpost[/:][a-zA-Z0-9\-]{1,63}[/:]accesspoint[/:][a-zA-Z0-9\-]{1,63}$"
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workingcontent: cloudContent related issues/PRs for notebooks with cloud hosted data relevanceContent related issues/PRs for notebooks with cloud hosted data relevanceupstream