Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add presigned url multi part upload #291

Merged
merged 9 commits into from
Oct 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 57 additions & 9 deletions evalai/utils/common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import click
import json
import random
import requests
import string
Expand All @@ -9,6 +10,8 @@
from datetime import datetime
from dateutil import tz
from http import HTTPStatus
from pathlib import Path
from tqdm import tqdm

from evalai.utils.auth import get_request_header, get_host_url
from evalai.utils.config import EVALAI_ERROR_CODES
Expand Down Expand Up @@ -37,7 +40,7 @@ def convert(self, value, param, ctx):
)


def upload_file_to_s3(file, presigned_url):
def upload_file_to_s3(file, presigned_urls, max_chunk_size):
"""
Function to upload a file, given the target presigned s3 url

Expand All @@ -54,10 +57,26 @@ def upload_file_to_s3(file, presigned_url):
)

try:
response = requests.put(
presigned_url,
data=file
)
parts = []
index = 0
file_size = Path(file.name).stat().st_size
for chunk_size in tqdm(range(0, file_size, max_chunk_size)):
presigned_url_object = presigned_urls[index]
part = presigned_url_object["partNumber"]
url = presigned_url_object["url"]
file_data = file.read(max_chunk_size)
response = requests.put(url, data=file_data)
if response.status_code != HTTPStatus.OK:
response.raise_for_status()

etag = response.headers['ETag']
parts.append({"ETag": etag, "PartNumber": part})
index += 1

response = {
"success": True,
"parts": parts
}
except Exception as err:
echo(style("\nThere was an error while uploading the file: {}".format(err), fg="red", bold=True))
sys.exit(1)
Expand Down Expand Up @@ -133,34 +152,63 @@ def generate_random_string(length):
def upload_file_using_presigned_url(challenge_phase_pk, file, file_type, submission_metadata={}):
if file_type == "submission":
url = "{}{}".format(get_host_url(), URLS.get_presigned_url_for_submission_file.value)
finish_upload_url = "{}{}".format(get_host_url(), URLS.finish_upload_for_submission_file.value)
elif file_type == "annotation":
url = "{}{}".format(get_host_url(), URLS.get_presigned_url_for_annotation_file.value)
finish_upload_url = "{}{}".format(get_host_url(), URLS.finish_upload_for_annotation_file.value)
url = url.format(challenge_phase_pk)
headers = get_request_header()

# Limit to max 100 MB chunk for multipart upload
max_chunk_size = 100 * 1024 * 1024

try:
# Fetching the presigned url
if file_type == "submission":
data = {"status": "submitting", "file_name": file.name}
file_size = Path(file.name).stat().st_size
num_file_chunks = int(file_size / max_chunk_size) + 1
data = {"status": "submitting", "file_name": file.name, "num_file_chunks": num_file_chunks}
data = dict(data, **submission_metadata)
response = requests.post(
url, headers=headers, data=data
)

if response.status_code is not HTTPStatus.CREATED:
response.raise_for_status()

# Update url params for multipart upload on S3
finish_upload_url = finish_upload_url.format(challenge_phase_pk, response.json().get("submission_pk"))
elif file_type == "annotation":
data = {"file_name": file.name}
file_size = Path(file.name).stat().st_size
num_file_chunks = int(file_size / max_chunk_size) + 1

data = {"file_name": file.name, "num_file_chunks": num_file_chunks}
response = requests.get(url, headers=headers, data=data)
if response.status_code is not HTTPStatus.OK:
response.raise_for_status()

# Update url params for multipart upload on S3
finish_upload_url = finish_upload_url.format(challenge_phase_pk)

response = response.json()
presigned_url = response.get("presigned_url")
presigned_urls = response.get("presigned_urls")
upload_id = response.get("upload_id")
if file_type == "submission":
submission_pk = response.get("submission_pk")

# Uploading the file to S3
response = upload_file_to_s3(file, presigned_url)
response = upload_file_to_s3(file, presigned_urls, max_chunk_size)

data = {
"parts": json.dumps(response.get("parts")),
"upload_id": upload_id
}

# Complete multipart S3 upload
response = requests.post(
finish_upload_url, headers=headers, data=data
)

if response.status_code is not HTTPStatus.OK:
response.raise_for_status()

Expand Down
2 changes: 2 additions & 0 deletions evalai/utils/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@ class URLS(Enum):
phase_details_using_slug = "/api/challenges/phase/{}/"
get_presigned_url_for_annotation_file = "/api/challenges/phases/{}/get_annotation_file_presigned_url/"
get_presigned_url_for_submission_file = "/api/jobs/phases/{}/get_submission_file_presigned_url/"
finish_upload_for_submission_file = "/api/jobs/phases/{}/submission_file_upload_complete/{}/"
finish_upload_for_annotation_file = "/api/challenges/phases/{}/annotation_file_upload_complete/"
send_submission_message = "/api/jobs/phases/{}/send_submission_message/{}/"
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ python-dateutil==2.7.3
requests==2.20.0
validators==0.12.6
termcolor==1.1.0
tqdm==4.49.0
RishabhJain2018 marked this conversation as resolved.
Show resolved Hide resolved
71 changes: 71 additions & 0 deletions tests/data/challenge_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,3 +421,74 @@
"results": []
}
"""

get_submission_file_presigned_url = """
{
"presigned_urls": [
{
"partNumber": 1,
"url": "https://staging-evalai.s3.amazonaws.com/media/submission_files/submission_403/5fb32be8-1fcf-42d3-9b9c-230e4027d656.json?uploadId=R8AlOJggehmUU87Ar2cDp2tF9p6Rez8iKmmWpwUD0Wi.VumJ6faNO_RFvkD3bJ4NzmokztAncqsd0JsUeslHGtK9m1B533gIQHbuxkwNgx_7F6_YTxtoUpqMHxFtY4Si&partNumber=1&Signature=Bga2IIsvDfJFOAMnZ5G64y0%3D&Expires=1603971221"
}
],
"upload_id": "R8AlOJggehmUU87Ar2cDp2tF9p6Rez8iKmmWpwUD0Wi.VumJ6faNO_RFvkD3bJ4NzmokztAncqsd0JsUeslHGtK9m1B533gIQHbuxkwNgx_7F6_YTxtoUpqMHxFtY4Si",
"submission_pk": 9
}
"""

upload_file_to_s3 = """
{
"parts": "[
{
"ETag": "\\"8e31830e7ed2b537d7fff83ef2525384\\"",
"PartNumber": 1
}
]",
"upload_id": "R8AlOJggehmUU87Ar2cDp2tF9p6Rez8iKmmWpwUD0Wi.VumJ6faNO_RFvkD3bJ4NzmokztAncqsd0JsUeslHGtK9m1B533gIQHbuxkwNgx_7F6_YTxtoUpqMHxFtY4Si"
}
"""

finish_submission_file_upload = """
{
"upload_id": "R8AlOJggehmUU87Ar2cDp2tF9p6Rez8iKmmWpwUD0Wi.VumJ6faNO_RFvkD3bJ4NzmokztAncqsd0JsUeslHGtK9m1B533gIQHbuxkwNgx_7F6_YTxtoUpqMHxFtY4Si",
"submission_pk": 9
}
"""

part_file_upload_to_s3 = """
{
"ETag": "\\"8e31830e7ed2b537d7fff83ef2525384\\""
}
"""

send_submission_message = """
{
"submission_pk": 9,
"phase_pk": 2,
"challenge_pk": 1,
}
"""

get_annotation_file_presigned_url = """
{
"presigned_urls": [
{
"partNumber": 1,
"url": "https://staging-evalai.s3.amazonaws.com/media/test_annotations/8af3d688-f559-49be-ab20-16e02805d228.txt?uploadId=40_2O5xMNg6dBZonEAIXNJdEmcwbAHDQpXzdM9ITvEawkBW96BCSZTcZf4qxNMfzK2ZhkJfjonuG6a4aP40UCY6EK8y66trEMf1AzlOs1VjNrg.T9nAaMPOIavDQLKJw&partNumber=1&Signature=lkHj9JhsvodXNnbKE%2F7y9t3E%3D&Expires=1603971332"
}
],
"upload_id": "40_2O5xMNg6dBZonEAIXNJdEmcwbAHDQpXzdM9ITvEawkBW96BCSZTcZf4qxNMfzK2ZhkJfjonuG6a4aP40UCY6EK8y66trEMf1AzlOs1VjNrg.T9nAaMPOIavDQLKJw"
}
"""

finish_annotation_file_upload = """
{
"upload_id": "40_2O5xMNg6dBZonEAIXNJdEmcwbAHDQpXzdM9ITvEawkBW96BCSZTcZf4qxNMfzK2ZhkJfjonuG6a4aP40UCY6EK8y66trEMf1AzlOs1VjNrg.T9nAaMPOIavDQLKJw",
"challenge_phase_pk": 2
}
"""

annotation_part_file_upload_to_s3 = """
{
"ETag": "\\"30bb2b9819ff69e7891523af9ad66b49\\""
}
"""
123 changes: 123 additions & 0 deletions tests/test_submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,79 @@ def setup(self):
status=200,
)

# To get presigned URLs for submission upload parts
url = "{}{}"
responses.add(
responses.POST,
url.format(API_HOST_URL, URLS.get_presigned_url_for_submission_file.value).format(
"2",
),
json=json.loads(challenge_response.get_submission_file_presigned_url),
status=200,
)

# To finish mulitpart file upload
url = "{}{}"
responses.add(
responses.POST,
url.format(API_HOST_URL, URLS.finish_upload_for_submission_file.value).format(
"2", "9"
),
json=json.loads(challenge_response.finish_submission_file_upload),
status=200,
)

# To get presigned URL for part
presigned_url_response = json.loads(challenge_response.get_submission_file_presigned_url)
part_file_upload_url = presigned_url_response["presigned_urls"][0]["url"]
responses.add(
responses.PUT,
part_file_upload_url,
headers=json.loads(challenge_response.part_file_upload_to_s3),
status=200,
)

# To publish submission message
url = "{}{}"
responses.add(
responses.POST,
url.format(API_HOST_URL, URLS.send_submission_message.value).format(
"2", "9"
)
)

# To get presigned URLs for submission upload parts
url = "{}{}"
responses.add(
responses.GET,
url.format(API_HOST_URL, URLS.get_presigned_url_for_annotation_file.value).format(
"2",
),
json=json.loads(challenge_response.get_annotation_file_presigned_url),
status=200,
)

# To finish mulitpart file upload
url = "{}{}"
responses.add(
responses.POST,
url.format(API_HOST_URL, URLS.finish_upload_for_annotation_file.value).format(
"2"
),
json=json.loads(challenge_response.finish_annotation_file_upload),
status=200,
)

# To get presigned URL for part
presigned_url_response = json.loads(challenge_response.get_annotation_file_presigned_url)
part_file_upload_url = presigned_url_response["presigned_urls"][0]["url"]
responses.add(
responses.PUT,
part_file_upload_url,
headers=json.loads(challenge_response.annotation_part_file_upload_to_s3),
status=200,
)

responses.add_passthru("http+docker://localhost/")

@responses.activate
Expand Down Expand Up @@ -293,6 +366,56 @@ def test_make_submission_for_docker_based_challenge(
)
assert result.exit_code == 0

@responses.activate
def test_make_submission_using_presigned_url(self, request):
expected = (
"Do you want to include the Submission Details? [y/N]: N\n"
"Uploading the file...\n\n"
"Your submission test_file.txt with the id 9 is successfully submitted for evaluation.\n\n"
)
runner = CliRunner()
with runner.isolated_filesystem():
with open("test_file.txt", "w") as f:
f.write("1 2 3 4 5 6")

result = runner.invoke(
challenge,
["1", "phase", "2", "submit", "--file", "test_file.txt", "--large"],
input="N"
)
response = result.output

# Remove progress bar from response
splitted_response = response.split("\n")
splitted_response.pop(2)
response = "\n".join(splitted_response)
assert response == expected

@responses.activate
def test_upload_annotation_using_presigned_url(self, request):
expected = (
"Uploading the file...\n\n"
"The annotation file test_file.txt for challenge phase 2 is successfully uploaded.\n\n"
)
runner = CliRunner()
with runner.isolated_filesystem():
with open("test_file.txt", "w") as f:
f.write("1 2 3 4 5 6")

result = runner.invoke(
challenge,
["1", "phase", "2", "submit", "--file", "test_file.txt", "--large", "--annotation"],
input="N"
)
response = result.output

# Remove progress bar from response
splitted_response = response.split("\n")
splitted_response.pop(1)

response = "\n".join(splitted_response)
assert response == expected


class TestPush(BaseTestClass):
def setup(self):
Expand Down