Skip to content

OutOfRangeInput Errors when copying blob from S3 Presigned URL #721

@yovelcohen

Description

@yovelcohen

I'm trying to copy a blob from an S3 Presigned URL.

class VideoCopier(AwaitableObjectMixin[str]):

	def __init__(self, source_get_url: str, source_head_url: str, target_blob: BlobNameType):
		self.head_source_url = source_head_url
		self.source_get_url = source_get_url
		self.target_blob = target_blob

	async def _run(self) -> str:
		try:
			total_size = await self.head_content_length()
		except Exception as e:
			total_size = None

		if total_size is None or total_size > (256 * 1024 * 1024):
			return await self.copy_large_blob_from_url(total_size=total_size)
		return await self.copy_directly()

	def get_target_blob_client(self) -> BlobClient:
		return get_blob_client(settings.PROJECT_BLOB_CONTAINER, self.target_blob)

	async def head_content_length(self) -> int:
		"""
		Send an HTTP HEAD request to `url` and return the integer value
		of its Content-Length header.
		"""
		async with httpx.AsyncClient() as client:
			resp = await client.head(self.head_source_url)
			length = resp.headers.get("Content-Length")
			if length is None:
				raise ValueError("No Content-Length header in response")
		return int(length)

	async def copy_large_blob_from_url(self, total_size: int, block_size: int = 32 * 1024 * 1024) -> str:
		with logfire.span('Starting video copy from url in chunks', _level='debug'):
			block_ids = list()
			dest_blob = self.get_target_blob_client()
			# async with asyncio.TaskGroup() as tg:
			for idx in range(math.ceil(total_size / block_size)):
				offset = idx * block_size
				length = min(block_size, total_size - offset)
				block_id = base64.b64encode(f"{idx:06}".encode()).decode()
				block_ids.append(block_id)
				await dest_blob.stage_block_from_url(
					block_id=block_id,
					source_url=self.source_get_url,
					source_offset=offset,
					source_length=length
				)

			mime_type, content_settings = mimetypes.guess_type(self.source_get_url)[0], None
			if mime_type is not None:
				content_settings = ContentSettings(content_type=mime_type)

			await dest_blob.commit_block_list(
				block_ids,
				validate_content=False,
				content_settings=content_settings
			)
			props = await get_blob_properties(
				container_name=settings.PROJECT_BLOB_CONTAINER,
				blob_name=self.target_blob
			)
		status = serialize_enum_value(props.copy.status)
		return status

The S3 presigned URL I'm using has 376 characters so it's than the limit.
The target blob has 65 characters so also ok.
The file itself weighs about 350 MB.
The operation fails at staging the block url itself (await dest_blob.stage_block_from_url) with the following error:

HttpResponseError('One of the request inputs is out of range.\nRequestId:a51eb161-701e-001a-47cc-09acb0000000\nTime:2025-08-10T08:00:08.6770484Z\nErrorCode:OutOfRangeInput')

Online the limit seems to be 100 mb, and I'm passing in chunks of 32 mb, chunks of 4 MB failed as well.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions