-
Notifications
You must be signed in to change notification settings - Fork 242
Open
Description
I'm trying to copy a blob from an S3 Presigned URL.
class VideoCopier(AwaitableObjectMixin[str]):
def __init__(self, source_get_url: str, source_head_url: str, target_blob: BlobNameType):
self.head_source_url = source_head_url
self.source_get_url = source_get_url
self.target_blob = target_blob
async def _run(self) -> str:
try:
total_size = await self.head_content_length()
except Exception as e:
total_size = None
if total_size is None or total_size > (256 * 1024 * 1024):
return await self.copy_large_blob_from_url(total_size=total_size)
return await self.copy_directly()
def get_target_blob_client(self) -> BlobClient:
return get_blob_client(settings.PROJECT_BLOB_CONTAINER, self.target_blob)
async def head_content_length(self) -> int:
"""
Send an HTTP HEAD request to `url` and return the integer value
of its Content-Length header.
"""
async with httpx.AsyncClient() as client:
resp = await client.head(self.head_source_url)
length = resp.headers.get("Content-Length")
if length is None:
raise ValueError("No Content-Length header in response")
return int(length)
async def copy_large_blob_from_url(self, total_size: int, block_size: int = 32 * 1024 * 1024) -> str:
with logfire.span('Starting video copy from url in chunks', _level='debug'):
block_ids = list()
dest_blob = self.get_target_blob_client()
# async with asyncio.TaskGroup() as tg:
for idx in range(math.ceil(total_size / block_size)):
offset = idx * block_size
length = min(block_size, total_size - offset)
block_id = base64.b64encode(f"{idx:06}".encode()).decode()
block_ids.append(block_id)
await dest_blob.stage_block_from_url(
block_id=block_id,
source_url=self.source_get_url,
source_offset=offset,
source_length=length
)
mime_type, content_settings = mimetypes.guess_type(self.source_get_url)[0], None
if mime_type is not None:
content_settings = ContentSettings(content_type=mime_type)
await dest_blob.commit_block_list(
block_ids,
validate_content=False,
content_settings=content_settings
)
props = await get_blob_properties(
container_name=settings.PROJECT_BLOB_CONTAINER,
blob_name=self.target_blob
)
status = serialize_enum_value(props.copy.status)
return statusThe S3 presigned URL I'm using has 376 characters so it's than the limit.
The target blob has 65 characters so also ok.
The file itself weighs about 350 MB.
The operation fails at staging the block url itself (await dest_blob.stage_block_from_url) with the following error:
HttpResponseError('One of the request inputs is out of range.\nRequestId:a51eb161-701e-001a-47cc-09acb0000000\nTime:2025-08-10T08:00:08.6770484Z\nErrorCode:OutOfRangeInput')
Online the limit seems to be 100 mb, and I'm passing in chunks of 32 mb, chunks of 4 MB failed as well.
Metadata
Metadata
Assignees
Labels
No labels