Skip to content

Commit

Permalink
fix: prevent blocking of non-browser clients accessing ML4K API
Browse files Browse the repository at this point in the history
Using the edge function to get requests to the relevant API
endpoints to look like curl, regardless of whatever Python or
App Inventor user-agents they provide. This seems to be accepted
by Cloudflare.

Signed-off-by: Dale Lane <dale.lane@uk.ibm.com>
  • Loading branch information
dalelane committed Sep 15, 2022
1 parent 40b2ccd commit 0b3fe1a
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 31 deletions.
22 changes: 6 additions & 16 deletions cis/edge-functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,28 +89,18 @@ function forwardRequest(request, targetHost) {
// newRequest.headers.set('X-Forwarded-Host', request.url.hostname);
newRequest.headers.set('host', targetHost);

if (!includesUserAgent(request) && isUrlExemptFromBrowserIntegrityCheck(request)) {
// the request didn't include a user-agent but they are trying
// to access a resource that shouldn't be blocked for this so
// we'll inject a dummy user-agent so can get through the
// Cloudflare in-front of Code Engine
newRequest.headers.set('user-agent', 'MachineLearningForKids');
if (isUrlExemptFromBrowserIntegrityCheck(request)) {
// URLs for some Scratch APIs are commonly accessed by non-browser
// clients (Python/Tensorflow/AppInventor/etc) so for these we
// add a user-agent to pretend to be curl, so that the Cloudflare
// in-front of Code Engine is less likely to block them
newRequest.headers.set('user-agent', 'curl/7.79.1');
}

return fetch(newUrl, newRequest);
}


function includesUserAgent(request) {
try {
return request.headers.has('User-Agent');
}
catch (err) {
// we couldn't check - let's assume there is one
return true;
}
}

function isUrlExemptFromBrowserIntegrityCheck(request) {
try {
return BIC_EXEMPT_URLS.some((urlRegexTest) => urlRegexTest.test(request.url));
Expand Down
17 changes: 2 additions & 15 deletions resources/mlforkids.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self, scratchkey: str):
print("MLFORKIDS: Downloading information about your machine learning project")
self.scratchkey = scratchkey
try:
apiurl = self.__switchToTemporarySite("https://machinelearningforkids.co.uk/api/scratch/" + scratchkey + "/train")
apiurl = "https://machinelearningforkids.co.uk/api/scratch/" + scratchkey + "/train"
with urllib.request.urlopen(apiurl) as url:
self.__downloaded_training_images_list = json.loads(url.read().decode())
except urllib.error.HTTPError:
Expand All @@ -55,7 +55,7 @@ def __get_training_images_generator(self):
projectcachedir = str(os.path.expanduser(os.path.join(cachedir, cachelocation)))
for trainingitem in self.__downloaded_training_images_list:
try:
tf.keras.utils.get_file(origin=self.__switchToTemporarySite(trainingitem["imageurl"]),
tf.keras.utils.get_file(origin=trainingitem["imageurl"],
cache_dir=cachedir,
cache_subdir=os.path.join(cachelocation, trainingitem["label"]),
fname=self.__get_fname(trainingitem))
Expand Down Expand Up @@ -115,19 +115,6 @@ def __train_model(self, trainingimagesdata):
print("MLFORKIDS: Model training complete")


# Cloudflare is currently blocking access to the Machine Learning for Kids API
# from non-browser user agents
# While I raise this with them to get this unblocked, switching to this
# temporary URL should avoid the problem
#
# TODO: remove this function as soon as Cloudflare have
# stopped breaking Python apps
#
def __switchToTemporarySite(self, url):
return url.replace("https://machinelearningforkids.co.uk/api/scratch/",
"https://mlforkids-api.j8clybxvjr0.us-south.codeengine.appdomain.cloud/api/scratch/")


#
# public methods
#
Expand Down

0 comments on commit 0b3fe1a

Please sign in to comment.