# FLAIR on DLaaS

In [1]:
# Client for reading/writing from/to COS:
import ibm_boto3
from ibm_botocore.client import Config

# Client for interacting with WML service:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

# For operating system commands:
import os
# For json handling:
import json
# For the creation of (random) unique string identifiers:
from uuid import uuid4
# For handling numerical arrays:
import numpy as np


Import cos credentials

In [2]:
with open("cos_credentials.json") as file:
    cos_credentials = json.load(file)

print(json.dumps(cos_credentials, indent=2))


{
  "apikey": "4iruOvDK4c9rQTR9nKk9tsiH6m5FC5oGchehD48xyEdc",
  "cos_hmac_keys": {
    "access_key_id": "1babb9ff7aba454b9f0bef5aefbf3355",
    "secret_access_key": "2e6a8a7376e839e05af2f276be5e4bfbd7fa2d560aea75e5"
  },
  "endpoints": "https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints",
  "iam_apikey_description": "Auto-generated for key 1babb9ff-7aba-454b-9f0b-ef5aefbf3355",
  "iam_apikey_name": "HBCP-COS-credentials",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/57dd42208dce4fd88998d8976d0edfa5::serviceid:ServiceId-6cffe870-c806-4e4f-be09-c4ab70ca02a2",
  "resource_instance_id": "crn:v1:bluemix:public:cloud-object-storage:global:a/57dd42208dce4fd88998d8976d0edfa5:cd94ed59-e442-4d10-b18e-c70ab33b71c3::"
}


Define COS service and authorization endpoints:

In [3]:

service_endpoint = 'https://s3-api.us-geo.objectstorage.softlayer.net'
auth_endpoint = 'https://iam.bluemix.net/oidc/token'

Create a Boto client instance:

In [4]:
cos = ibm_boto3.resource('s3',
                         ibm_api_key_id=cos_credentials['apikey'],
                         ibm_service_instance_id=cos_credentials['resource_instance_id'],
                         ibm_auth_endpoint=auth_endpoint,
                         config=Config(signature_version='oauth'),
                         endpoint_url=service_endpoint)

Load the WML service credentials from wml_credentials.json

In [5]:
with open("wml_credentials.json") as file:
    wml_credentials = json.load(file)

    
print(json.dumps(wml_credentials, indent=2))

{
  "apikey": "xpIzCakY0YpYaxJ0rAT2MqGXWXHGz6UL1q4FxqZ93nXp",
  "iam_apikey_description": "Auto-generated for key b316c971-2643-47d0-807a-cdb5205bb7c4",
  "iam_apikey_name": "HBCP-wml-credentials",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/57dd42208dce4fd88998d8976d0edfa5::serviceid:ServiceId-ac8ea6a2-f280-4e3b-910b-aed6a06b641f",
  "instance_id": "a25b50e4-e423-4bbe-a626-37f4544b6efd",
  "url": "https://us-south.ml.cloud.ibm.com"
}


Create a WML client instance:

In [6]:
wml = WatsonMachineLearningAPIClient(wml_credentials)


Watson Machine Learning retrieves training/testing data from Cloud Object Store (COS). Therefore we need to upload the data to COS before we can start the model training, if we have not already.

If the buckets and training data have already been set up, skip the bucket creation.

In [7]:
### skip this step if bucket are already created
my_name = 'hbcp'
bucket_uid = str(uuid4())
buckets = [my_name + '-training-data-' + bucket_uid, my_name + '-training-results-' + bucket_uid]

for bucket in buckets:
    if not cos.Bucket(bucket) in cos.buckets.all():
        print('Creating bucket "{}"...'.format(bucket))
        try:
            cos.create_bucket(Bucket=bucket)
        except ibm_boto3.exceptions.ibm_botocore.client.ClientError as e:
            print('Error: {}.'.format(e.response['Error']['Message']))



Creating bucket "hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c"...
Creating bucket "hbcp-training-results-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c"...


In [7]:
# if buckets already exist with this prefix, we will use them
my_name = 'hbcp'
buckets = []
for bucket in cos.buckets.all():
    if bucket.name.startswith(my_name + '-training-'):
        buckets.append(bucket.name)

In [8]:
list(cos.buckets.all())

[s3.Bucket(name='hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c'),
 s3.Bucket(name='hbcp-training-results-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c')]

In [9]:
### Skip if the data has already been uploaded
#Next we upload those four files into the training/testing data bucket:
# Note that this data is in parent directory
for filename in ['train.csv',
              'test.csv']:
    print('Uploading data {} to {}...'.format(filename, buckets[0]))
    with open('../' + filename, 'rb') as file:
        cos.Bucket(buckets[0]).upload_fileobj(file, filename)
        print('{} is uploaded.'.format(filename))

Uploading data train.csv to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
train.csv is uploaded.
Uploading data test.csv to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
test.csv is uploaded.


In [10]:
#Let's now take a look at the content of our training/testing data bucket:


print(buckets[0])
for obj in cos.Bucket(buckets[0]).objects.all():
    print("  File: {}, {:4.2f}kB".format(obj.key, obj.size/1024))

hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c
  File: _wml_checkpoints/, 0.00kB
  File: best-model.pt, 435795.67kB
  File: test.csv, 1769.08kB
  File: testfile/Mohiuddin-2007.pdf.txt, 32.09kB
  File: testfile/Morgan_1996_Reaching midlife and older smokers tailored.pdf.txt, 37.30kB
  File: testfile/Mueller 2012.pdf.txt, 32.06kB
  File: testfile/Murray 2008.pdf.txt, 44.34kB
  File: testfile/Myles 1996.pdf.txt, 0.00kB
  File: testfile/Nagle 2005.pdf.txt, 48.99kB
  File: testfile/Nevid 1997.pdf.txt, 86.10kB
  File: testfile/Ng 2010 DONE IN PILOT.pdf.txt, 46.00kB
  File: testfile/OXCHECK 1994.pdf.txt, 45.60kB
  File: testfile/Ockene 1991.pdf.txt, 40.73kB
  File: testfile/Okuyemi 2007.pdf.txt, 32.51kB
  File: testfile/Okuyemi 2013 (c) primary paper.pdf.txt, 52.80kB
  File: testfile/Orleans 1991.pdf.txt, 57.51kB
  File: testfile/Orleans 1998.pdf.txt, 43.45kB
  File: testfile/Osinubi 2003.pdf.txt, 37.14kB
  File: testfile/Ossip-Klein 1991.pdf.txt, 47.36kB
  File: testfile/Ostroff 201

##  Upload model training artifacts to WML

There are three different artifacts that need to be uploaded to WML for model training:

the actual code for model training (in a .zip file)
model_definition_metadata: a dictionary specifying the framework, runtime and command for launching the training
training_configuration_metadata: a dictionary specifying the compute configuration and input/output buckets in COS
We'll use the WML client instance that we created above for that purpose. Note: the complete watson-machine-learning-client API documentation can be found here: http://wml-api-pyclient.mybluemix.net/.



In [11]:
# Upload the train.py file the logic for training
model_filename='hbcp-train-model.zip'

In [12]:
#upload the model definition metadata and fill it according to what you need here:

with open("model_definition_metadata.json") as file:
    model_definition_metadata = json.load(file)
print(json.dumps(model_definition_metadata, indent=2))



{
  "name": "hbcp-flair",
  "description": "Train Flair NER model for HBCP information extraction",
  "author_name": "hbcp",
  "framework_name": "pytorch",
  "framework_version": "1.0",
  "runtime_name": "python",
  "runtime_version": "3.5",
  "command": "sh hbcpFlairTrain.sh"
}


In [13]:
# Upload and configure training configuration metadata

with open("training_configuration_metadata.json") as file:
    training_configuration_metadata = json.load(file)
    
#specify connection with the bucket

connection = {  "endpoint_url": service_endpoint,
                "aws_access_key_id": cos_credentials['cos_hmac_keys']['access_key_id'],
                "aws_secret_access_key": cos_credentials['cos_hmac_keys']['secret_access_key']
            }


training_configuration_metadata['training_data']['connection'] = connection
training_configuration_metadata['training_data']['source'] = {"bucket": buckets[0]}
training_configuration_metadata['training_results']['connection'] = connection
training_configuration_metadata['training_results']['target'] = {"bucket": buckets[1]}

print(json.dumps(training_configuration_metadata, indent=2))


{
  "name": "hbcp-flair-train",
  "author_name": "hbcp",
  "description": "Train Flair NER model for HBCP information extraction",
  "compute_configuration_name": {
    "name": "k80"
  },
  "training_data": {
    "connection": {
      "endpoint_url": "https://s3-api.us-geo.objectstorage.softlayer.net",
      "aws_access_key_id": "1babb9ff7aba454b9f0bef5aefbf3355",
      "aws_secret_access_key": "2e6a8a7376e839e05af2f276be5e4bfbd7fa2d560aea75e5"
    },
    "source": {
      "bucket": "hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c"
    },
    "type": "s3"
  },
  "training_results": {
    "connection": {
      "endpoint_url": "https://s3-api.us-geo.objectstorage.softlayer.net",
      "aws_access_key_id": "1babb9ff7aba454b9f0bef5aefbf3355",
      "aws_secret_access_key": "2e6a8a7376e839e05af2f276be5e4bfbd7fa2d560aea75e5"
    },
    "target": {
      "bucket": "hbcp-training-results-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c"
    },
    "type": "s3"
  }
}


In [14]:
#Now post the model code / definition metadata and obtain the definition_uid which we will need later:
definition_details = wml.repository.store_definition(model_filename, model_definition_metadata)
definition_uid = wml.repository.get_definition_uid(definition_details)

print('model definition_uid: ' + definition_uid)

model definition_uid: 526554a9-e702-4248-a46d-6a3edc70c857


## Train the model
Now we have all the ingredients to launch the model training on the WML server side.

Note: to run the training in the background, we set the optional parameter asynchronous=True (its default value). To run the training in active mode, set asynchronous=False.

We also extract the training_run_guid_async which we will need to poll the status of the training run.


In [15]:

training_run_details = wml.training.run(definition_uid, training_configuration_metadata, asynchronous=True)
training_run_guid_async = wml.training.get_run_uid(training_run_details)
print('training_run_guid_async: ' + training_run_guid_async)


In [22]:
#let's look at the status

status = wml.training.get_status(training_run_guid_async)
print(json.dumps(status, indent=2))

{
  "state": "completed",
  "submitted_at": "2019-09-04T16:50:57.930Z",
  "running_at": "2019-09-04T16:51:06.554Z",
  "message": "training-DwLIKz5ZR: ",
  "metrics": [],
  "current_at": "2019-09-05T16:38:33.890Z"
}


Or to monitor the training log (note: an update of the log is available every 30 seconds):

In [37]:
wml.training.monitor_logs(training_run_guid_async)



####################################################

Log monitor started for training run: model-n4pj0bkb

####################################################


training-pum4n9FWg: Training with training/test data at:

training-pum4n9FWg:   DATA_DIR: /mnt/data/hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c

training-pum4n9FWg:   MODEL_DIR: /job/model-code

training-pum4n9FWg:   TRAINING_COMMAND: sh DLaaS-Flair-HBCP.sh

training-pum4n9FWg:   LOG_DIR: /job/logs

training-pum4n9FWg:   RESULT_DIR: /mnt/results/hbcp-training-results-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c/training-pum4n9FWg

training-pum4n9FWg:   NUM_LEARNERS: 1

training-pum4n9FWg: Mon Sep  2 16:29:35 UTC 2019: Running PyTorch job

training-pum4n9FWg: Collecting flair

training-pum4n9FWg:   Downloading https://files.pythonhosted.org/packages/77/e3/389c2dd8d0e6ca1d8fad11aa4940e8df6909a26a5d954c0eff01f0d78b57/flair-0.4.3-py3-none-any.whl (180kB)


training-pum4n9FWg: Collecting pytorch-transformers>=1.1.0 (from fla

training-pum4n9FWg:   Building wheel for smart-open (setup.py): finished with status 'done'

training-pum4n9FWg:   Stored in directory: /home/gpuuser/.cache/pip/wheels/5f/ea/fb/5b1a947b369724063b2617011f1540c44eb00e28c3d2ca8692

training-pum4n9FWg: Successfully built sklearn langdetect tabulate regex sqlitedict mpld3 segtok smart-open

training-pum4n9FWg: awscli 1.16.220 has requirement botocore==1.12.210, but you'll have botocore 1.12.220 which is incompatible.

training-pum4n9FWg: awscli 1.16.220 has requirement colorama<=0.3.9,>=0.2.5, but you'll have colorama 0.4.1 which is incompatible.

training-pum4n9FWg: Installing collected packages: regex, botocore, boto3, sentencepiece, pytorch-transformers, sklearn, langdetect, smart-open, gensim, bpemb, deprecated, tabulate, pymongo, hyperopt, sqlitedict, ipython, mpld3, segtok, flair

training-pum4n9FWg:   Found existing installation: botocore 1.12.210

training-pum4n9FWg:     Uninstalling botocore-1.12.210:

training-pum4n9FWg:       Suc

KeyboardInterrupt: 

Download the model (currently using `best-model.pt`) from COS.  This can be done from the web interface or with the following code.

In [57]:
# model to download and upload
downloaded_model = 'best-model.pt'

# TODO FIXME
#obj = cos.Object(buckets[1], training_run_guid_async +'/training-status.json')
obj = cos.Object(buckets[1], 'training-pum4n9FWg' +'/training-status.json')  # specify training run if necessary
with open(downloaded_model, 'wb') as data:
    obj.download_fileobj(data)
    print('{} is downloaded.'.format(downloaded_model))




ClientError: An error occurred (404) when calling the HeadObject operation: Not Found

In [40]:
# model to download and upload
downloaded_model = 'best-model.pt'

# add model to training bucket

print('Uploading model {} to {}...'.format(downloaded_model, buckets[0]))
with open(downloaded_model, 'rb') as file:
    cos.Bucket(buckets[0]).upload_fileobj(file, downloaded_model)
    print('{} is uploaded.'.format(downloaded_model))

Uploading model best-model.pt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
best-model.pt is uploaded.


We need to add the files to preform the FLAIR NER prection on.

In [48]:
# upload all the text files that Flair does NER precition on. These should be local under 'testfile'
dirname = "testfile/"
for filename in os.listdir(dirname):
    print('Uploading data {} to {}...'.format(dirname + filename, buckets[0]))
    with open(dirname + filename, 'rb') as file:
        cos.Bucket(buckets[0]).upload_fileobj(file, dirname + filename)
        print('{} is uploaded.'.format(dirname + filename))

Uploading data testfile/Rimer 1994.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Rimer 1994.pdf.txt is uploaded.
Uploading data testfile/Solomon 2005.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Solomon 2005.pdf.txt is uploaded.
Uploading data testfile/Smith 2013.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Smith 2013.pdf.txt is uploaded.
Uploading data testfile/Reitzel-2011.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Reitzel-2011.pdf.txt is uploaded.
Uploading data testfile/Stotts 2002.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Stotts 2002.pdf.txt is uploaded.
Uploading data testfile/Pbert 2004 (c)primary paper.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Pbert 2004 (c)primary paper.pdf.txt is uploaded.
Uploading data testfile/Romand-2005.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-

testfile/Russell_1983_Effect of nicotine chewing gum as.pdf.txt is uploaded.
Uploading data testfile/Sorensen 2007.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Sorensen 2007.pdf.txt is uploaded.
Uploading data testfile/Reid 2008 (c) primary paper.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Reid 2008 (c) primary paper.pdf.txt is uploaded.
Uploading data testfile/Orleans 1991.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Orleans 1991.pdf.txt is uploaded.
Uploading data testfile/Rodondi 2012 primary paper.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Rodondi 2012 primary paper.pdf.txt is uploaded.
Uploading data testfile/Russell_1979_Effect of general practitioners advice against.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Russell_1979_Effect of general practitioners advice against.pdf.txt is uploaded.
Uploading data test

testfile/Zhu 1996.pdf.txt is uploaded.
Uploading data testfile/Ng 2010 DONE IN PILOT.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Ng 2010 DONE IN PILOT.pdf.txt is uploaded.
Uploading data testfile/Prochaska 2001.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Prochaska 2001.pdf.txt is uploaded.
Uploading data testfile/Parkes 2008 primary.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Parkes 2008 primary.pdf.txt is uploaded.
Uploading data testfile/Solomon 2000.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Solomon 2000.pdf.txt is uploaded.
Uploading data testfile/Rose_1992_Randomised controlled trial of anti smoking.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c...
testfile/Rose_1992_Randomised controlled trial of anti smoking.pdf.txt is uploaded.
Uploading data testfile/Mohiuddin-2007.pdf.txt to hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-

We need to run a separate script for predicting labels on the test data.  There might be a better way of doing this, but for now we'll do something similar to the training phase:
1. Define and store model definition (JSON).
2. Define and traing configuration (JSON).
3. Upload testing code (ZIP).
4. Run script ('train').

**NOTE: We overwrite the model and training metadata variable.**

In [44]:
# check model JSON
with open("predict_model_definition_metadata.json") as file:
    model_definition_metadata = json.load(file)
print(json.dumps(model_definition_metadata, indent=2))

{
  "name": "HBCPpredict",
  "description": "Run Flair NER prediction for HBCP information extraction",
  "author_name": "HBCP",
  "framework_name": "pytorch",
  "framework_version": "1.0",
  "runtime_name": "python",
  "runtime_version": "3.5",
  "command": "sh hbcpFlairPrediction.sh"
}


In [46]:
# check 'training' configuration
# training configuration is the same except we need to use the shell script for prediction instead
training_configuration_metadata["command"] = model_definition_metadata["command"]
print(json.dumps(training_configuration_metadata, indent=2))

{
  "name": "FLAIR-HBCP",
  "author_name": "francesca",
  "description": "training on Abby parsed sentences without the dummy sentences of the tables!",
  "compute_configuration_name": {
    "name": "k80"
  },
  "training_data": {
    "connection": {
      "endpoint_url": "https://s3-api.us-geo.objectstorage.softlayer.net",
      "aws_access_key_id": "1babb9ff7aba454b9f0bef5aefbf3355",
      "aws_secret_access_key": "2e6a8a7376e839e05af2f276be5e4bfbd7fa2d560aea75e5"
    },
    "source": {
      "bucket": "hbcp-training-data-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c"
    },
    "type": "s3"
  },
  "training_results": {
    "connection": {
      "endpoint_url": "https://s3-api.us-geo.objectstorage.softlayer.net",
      "aws_access_key_id": "1babb9ff7aba454b9f0bef5aefbf3355",
      "aws_secret_access_key": "2e6a8a7376e839e05af2f276be5e4bfbd7fa2d560aea75e5"
    },
    "target": {
      "bucket": "hbcp-training-results-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c"
    },
    "type": "s3"
  },
  "command

In [52]:
# upload 'training' code
predict_zip = "flairDLaaSpredict.zip"

#Now post the model code / definition metadata and obtain the definition_uid which we will need later:
definition_details = wml.repository.store_definition(predict_zip, model_definition_metadata)
definition_uid = wml.repository.get_definition_uid(definition_details)

print('model definition_uid: ' + definition_uid)

model definition_uid: 73e184dc-5265-45a8-adc7-547923332f59


In [53]:
# now run this 'training'

training_run_details = wml.training.run(definition_uid, training_configuration_metadata, asynchronous=True)
training_run_guid_async = wml.training.get_run_uid(training_run_details)
print('training_run_guid_async: ' + training_run_guid_async)

training_run_guid_async: model-zpdxnisf


In [60]:
#let's look at the status

status = wml.training.get_status(training_run_guid_async)
print(json.dumps(status, indent=2))

{
  "state": "completed",
  "finished_at": "2019-09-04T10:22:10.373Z",
  "submitted_at": "2019-09-04T09:53:25.123Z",
  "running_at": "2019-09-04T09:58:00.986Z",
  "message": "training-AIMFagcWg: ",
  "metrics": [],
  "current_at": "2019-09-04T10:23:01.248Z"
}


In [81]:
# download COS storage for a particular training run
training_id = status['message'].split(':')[0]
print(buckets[1])
#obj = cos.Object(buckets[1], training_run_guid_async +'/training-status.json')
#training_id = json.dumps(obj.get()['Body'].read().decode('utf-8'))[0]
print("Model location:" + training_id)
for obj in cos.Bucket(buckets[1]).objects.all():
    if obj.key.startswith(training_id) and obj.key.endswith('json'):
        print("  Downloading {}...".format(obj.key))
        cos.Object(obj.bucket_name, obj.key).download_file('/tmp/' + obj.key)
        print("  .Downloaded {}.".format(obj.key))

        

hbcp-training-results-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c
Model location:training-AIMFagcWg
  Downloading training-AIMFagcWg/Mohiuddin-2007.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Mohiuddin-2007.pdf.txt.json.
  Downloading training-AIMFagcWg/Morgan_1996_Reaching midlife and older smokers tailored.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Morgan_1996_Reaching midlife and older smokers tailored.pdf.txt.json.
  Downloading training-AIMFagcWg/Mueller 2012.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Mueller 2012.pdf.txt.json.
  Downloading training-AIMFagcWg/Murray 2008.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Murray 2008.pdf.txt.json.
  Downloading training-AIMFagcWg/Myles 1996.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Myles 1996.pdf.txt.json.
  Downloading training-AIMFagcWg/Nagle 2005.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Nagle 2005.pdf.txt.json.
  Downloading training-AIMFagcWg/Nevid 1997.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Ne

  .Downloaded training-AIMFagcWg/Senore_1998_Predictors of smoking cessation following physician.pdf.txt.json.
  Downloading training-AIMFagcWg/Seversen_1997_Reducing maternal smoking and relapse long.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Seversen_1997_Reducing maternal smoking and relapse long.pdf.txt.json.
  Downloading training-AIMFagcWg/Siddiqi 2013 primary paper.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Siddiqi 2013 primary paper.pdf.txt.json.
  Downloading training-AIMFagcWg/Simmons 2013.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Simmons 2013.pdf.txt.json.
  Downloading training-AIMFagcWg/Simmons_2007_Secondary Smoking Prevention in a University_10.10370278-6133.26.3.268.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Simmons_2007_Secondary Smoking Prevention in a University_10.10370278-6133.26.3.268.pdf.txt.json.
  Downloading training-AIMFagcWg/Simon 2003 DONE IN PILOT.pdf.txt.json...
  .Downloaded training-AIMFagcWg/Simon 2003 DONE IN PILOT.pdf.txt.jso

In [84]:
# clean COS storage for a particular training run
training_id = 'training-AIMFagcWg' # specify training id to remove
print(buckets[1])
#obj = cos.Object(buckets[1], training_run_guid_async +'/training-status.json')
#training_id = json.dumps(obj.get()['Body'].read().decode('utf-8'))[0]
print("Model to remove:" + training_id)
for obj in cos.Bucket(buckets[1]).objects.all():
    if obj.key.startswith(training_id):
        print("  Deleting {}...".format(obj.key))
        obj.delete()
        print("  .Deleted {}.".format(obj.key))

hbcp-training-results-ae0f81a5-4e2a-4d88-ae92-28aff36bb51c
Model to remove:training-AIMFagcWg
  Deleting training-AIMFagcWg/Mohiuddin-2007.pdf.txt.json...
  .Deleted training-AIMFagcWg/Mohiuddin-2007.pdf.txt.json.
  Deleting training-AIMFagcWg/Morgan_1996_Reaching midlife and older smokers tailored.pdf.txt.json...
  .Deleted training-AIMFagcWg/Morgan_1996_Reaching midlife and older smokers tailored.pdf.txt.json.
  Deleting training-AIMFagcWg/Mueller 2012.pdf.txt.json...
  .Deleted training-AIMFagcWg/Mueller 2012.pdf.txt.json.
  Deleting training-AIMFagcWg/Murray 2008.pdf.txt.json...
  .Deleted training-AIMFagcWg/Murray 2008.pdf.txt.json.
  Deleting training-AIMFagcWg/Myles 1996.pdf.txt.json...
  .Deleted training-AIMFagcWg/Myles 1996.pdf.txt.json.
  Deleting training-AIMFagcWg/Nagle 2005.pdf.txt.json...
  .Deleted training-AIMFagcWg/Nagle 2005.pdf.txt.json.
  Deleting training-AIMFagcWg/Nevid 1997.pdf.txt.json...
  .Deleted training-AIMFagcWg/Nevid 1997.pdf.txt.json.
  Deleting trainin

  .Deleted training-AIMFagcWg/Siddiqi 2013 primary paper.pdf.txt.json.
  Deleting training-AIMFagcWg/Simmons 2013.pdf.txt.json...
  .Deleted training-AIMFagcWg/Simmons 2013.pdf.txt.json.
  Deleting training-AIMFagcWg/Simmons_2007_Secondary Smoking Prevention in a University_10.10370278-6133.26.3.268.pdf.txt.json...
  .Deleted training-AIMFagcWg/Simmons_2007_Secondary Smoking Prevention in a University_10.10370278-6133.26.3.268.pdf.txt.json.
  Deleting training-AIMFagcWg/Simon 2003 DONE IN PILOT.pdf.txt.json...
  .Deleted training-AIMFagcWg/Simon 2003 DONE IN PILOT.pdf.txt.json.
  Deleting training-AIMFagcWg/Sims 2013.pdf.txt.json...
  .Deleted training-AIMFagcWg/Sims 2013.pdf.txt.json.
  Deleting training-AIMFagcWg/Slama_1990_The effectiveness of two smoking cessation.pdf.txt.json...
  .Deleted training-AIMFagcWg/Slama_1990_The effectiveness of two smoking cessation.pdf.txt.json.
  Deleting training-AIMFagcWg/Smith 2001 primary paper.pdf.txt.json...
  .Deleted training-AIMFagcWg/Smith 

Save model (wait and see if this is necessary):

In [26]:

saved_model_name = "flair-model-pumedambeddings-with-test-as-dev"
saved_model_details = wml.repository.store_model(training_run_guid_async, {"name": saved_model_name})
model_uid = wml.repository.get_model_uid(saved_model_details)

print("Saved model uid: " + model_uid)



KeyError: 'secret_access_key'

