Skip to content

Commit

Permalink
CU-862jzmnm1: improve load_examples to also include the example SNOME…
Browse files Browse the repository at this point in the history
…D CT model
  • Loading branch information
tomolopolis committed Jun 14, 2023
1 parent 267376b commit 0e30c84
Showing 1 changed file with 26 additions and 12 deletions.
38 changes: 26 additions & 12 deletions webapp/load_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@


def main(port=8000,
cdb_tmp_file='/home/cdb.dat',
umls_cdb_tmp_file='/home/cdb.dat',
snomed_cdb_tmp_file='/home/snomed-cdb.dat',
vocab_tmp_file='/home/vocab.dat',
dataset_tmp_file='/home/ds.csv',
initial_wait=15):
Expand Down Expand Up @@ -45,24 +46,32 @@ def main(port=8000,
if all(codes) and all(json.loads(r.text)['count'] == 0 for r in all_resps):
print("Found No Objects. Populating Example: Concept DB, Vocabulary, Dataset and Project...")
# download example cdb, vocab, dataset
print("Downloading example CDB...")
cdb_file = requests.get('https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1.dat')
with open(cdb_tmp_file, 'wb') as f:
print("Downloading example UMLS CDB...")
cdb_file = requests.get('https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/cdb-medmen-v1.dat')
with open(umls_cdb_tmp_file, 'wb') as f:
f.write(cdb_file.content)
print("Downloading example SNOMED CT CDB...")
snomed_cdb_file = requests.get('https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/snomed-cdb-mc-v1.cdb')
with open(snomed_cdb_tmp_file, 'wb') as f:
f.write(snomed_cdb_file.content)
print("Downloading example vocab...")
vocab_file = requests.get('https://medcat.rosalind.kcl.ac.uk/media/vocab.dat')
vocab_file = requests.get('https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat')
with open(vocab_tmp_file, 'wb') as f:
f.write(vocab_file.content)
print("Downloading example dataset")
ds = requests.get('https://raw.githubusercontent.com/CogStack/MedCATtrainer/master/notebook_docs/example_data/psych.csv')
ds = requests.get('https://raw.githubusercontent.com/CogStack/MedCATtrainer/master/notebook_docs/example_data/ortho.csv')
with open(dataset_tmp_file, 'w') as f:
f.write(ds.text)

ds_dict = pd.read_csv(dataset_tmp_file).loc[:, 'text'].to_dict()
create_example_project(URL, headers, cdb_tmp_file, vocab_tmp_file, ds_dict)
ds_dict = pd.read_csv(dataset_tmp_file).loc[:, ['name', 'text']].to_dict()
create_example_project(URL, headers, umls_cdb_tmp_file, vocab_tmp_file, ds_dict, 'umls_cdb',
'Example Project - UMLS (Diseases / Symptoms / Findings')
create_example_project(URL, headers, snomed_cdb_tmp_file, vocab_tmp_file, ds_dict, 'snomed_cdb',
'Example Project - SNOMED CT All')

# clean up temp files
os.remove(cdb_tmp_file)
os.remove(umls_cdb_tmp_file)
os.remove(snomed_cdb_tmp_file)
os.remove(vocab_tmp_file)
os.remove(dataset_tmp_file)
break
Expand All @@ -73,10 +82,10 @@ def main(port=8000,
sleep(5)


def create_example_project(url, headers, cdb, vocab, ds_dict):
def create_example_project(url, headers, cdb, vocab, ds_dict, cdb_name, project_name):
print('Creating CDB / Vocab / Dataset / Project in the Trainer')
res_cdb_mk = requests.post(f'{url}concept-dbs/', headers=headers,
data={'name': 'api_upload_cdb', 'use_for_training': True},
data={'name': cdb_name, 'use_for_training': True},
files={'cdb_file': open(cdb, 'rb')})
cdb_id = json.loads(res_cdb_mk.text)['id']
res_vocab_mk = requests.post(f'{url}vocabs/', headers=headers,
Expand All @@ -96,7 +105,7 @@ def create_example_project(url, headers, cdb, vocab, ds_dict):

# Create the project
payload = {
'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings)',
'name': project_name,
'description': 'Example projects using example psychiatric clinical notes from '
'https://www.mtsamples.com/',
'cuis': '',
Expand All @@ -112,3 +121,8 @@ def create_example_project(url, headers, cdb, vocab, ds_dict):

if __name__ == '__main__':
main()
# main(port=8001,
# umls_cdb_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/cdb.dat',
# snomed_cdb_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/snomed-cdb.dat',
# vocab_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/vocab.dat',
# dataset_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/ds.csv')

0 comments on commit 0e30c84

Please sign in to comment.