CU-862jzmnm1: improve load_examples to also include the example SNOME…

…D CT model
CogStack · Jun 14, 2023 · 0e30c84 · 0e30c84
1 parent 267376b
commit 0e30c84
Showing 1 changed file with 26 additions and 12 deletions.
diff --git a/webapp/load_examples.py b/webapp/load_examples.py
@@ -7,7 +7,8 @@
 
 
 def main(port=8000,
-         cdb_tmp_file='/home/cdb.dat',
+         umls_cdb_tmp_file='/home/cdb.dat',
+         snomed_cdb_tmp_file='/home/snomed-cdb.dat',
          vocab_tmp_file='/home/vocab.dat',
          dataset_tmp_file='/home/ds.csv',
          initial_wait=15):
@@ -45,24 +46,32 @@ def main(port=8000,
             if all(codes) and all(json.loads(r.text)['count'] == 0 for r in all_resps):
                 print("Found No Objects. Populating Example: Concept DB, Vocabulary, Dataset and Project...")
                 # download example cdb, vocab, dataset
-                print("Downloading example CDB...")
-                cdb_file = requests.get('https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1.dat')
-                with open(cdb_tmp_file, 'wb') as f:
+                print("Downloading example UMLS CDB...")
+                cdb_file = requests.get('https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/cdb-medmen-v1.dat')
+                with open(umls_cdb_tmp_file, 'wb') as f:
                     f.write(cdb_file.content)
+                print("Downloading example SNOMED CT CDB...")
+                snomed_cdb_file = requests.get('https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/snomed-cdb-mc-v1.cdb')
+                with open(snomed_cdb_tmp_file, 'wb') as f:
+                    f.write(snomed_cdb_file.content)
                 print("Downloading example vocab...")
-                vocab_file = requests.get('https://medcat.rosalind.kcl.ac.uk/media/vocab.dat')
+                vocab_file = requests.get('https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat')
                 with open(vocab_tmp_file, 'wb') as f:
                     f.write(vocab_file.content)
                 print("Downloading example dataset")
-                ds = requests.get('https://raw.githubusercontent.com/CogStack/MedCATtrainer/master/notebook_docs/example_data/psych.csv')
+                ds = requests.get('https://raw.githubusercontent.com/CogStack/MedCATtrainer/master/notebook_docs/example_data/ortho.csv')
                 with open(dataset_tmp_file, 'w') as f:
                     f.write(ds.text)
 
-                ds_dict = pd.read_csv(dataset_tmp_file).loc[:, 'text'].to_dict()
-                create_example_project(URL, headers, cdb_tmp_file, vocab_tmp_file, ds_dict)
+                ds_dict = pd.read_csv(dataset_tmp_file).loc[:, ['name', 'text']].to_dict()
+                create_example_project(URL, headers, umls_cdb_tmp_file, vocab_tmp_file, ds_dict, 'umls_cdb',
+                                       'Example Project - UMLS (Diseases / Symptoms / Findings')
+                create_example_project(URL, headers, snomed_cdb_tmp_file, vocab_tmp_file, ds_dict, 'snomed_cdb',
+                                       'Example Project - SNOMED CT All')
 
                 # clean up temp files
-                os.remove(cdb_tmp_file)
+                os.remove(umls_cdb_tmp_file)
+                os.remove(snomed_cdb_tmp_file)
                 os.remove(vocab_tmp_file)
                 os.remove(dataset_tmp_file)
                 break
@@ -73,10 +82,10 @@ def main(port=8000,
         sleep(5)
 
 
-def create_example_project(url, headers, cdb, vocab, ds_dict):
+def create_example_project(url, headers, cdb, vocab, ds_dict, cdb_name, project_name):
     print('Creating CDB / Vocab / Dataset / Project in the Trainer')
     res_cdb_mk = requests.post(f'{url}concept-dbs/', headers=headers,
-                               data={'name': 'api_upload_cdb', 'use_for_training': True},
+                               data={'name': cdb_name, 'use_for_training': True},
                                files={'cdb_file': open(cdb, 'rb')})
     cdb_id = json.loads(res_cdb_mk.text)['id']
     res_vocab_mk = requests.post(f'{url}vocabs/', headers=headers,
@@ -96,7 +105,7 @@ def create_example_project(url, headers, cdb, vocab, ds_dict):
 
     # Create the project
     payload = {
-        'name': 'Example Annotation Project - UMLS (Diseases / Symptoms / Findings)',
+        'name': project_name,
         'description': 'Example projects using example psychiatric clinical notes from '
                        'https://www.mtsamples.com/',
         'cuis': '',
@@ -112,3 +121,8 @@ def create_example_project(url, headers, cdb, vocab, ds_dict):
 
 if __name__ == '__main__':
     main()
+    # main(port=8001,
+    # umls_cdb_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/cdb.dat',
+    # snomed_cdb_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/snomed-cdb.dat',
+    # vocab_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/vocab.dat',
+    # dataset_tmp_file='/Users/tom/phd/MedCATtrainer/scratch/ds.csv')