From 8df734cea10600711db103ab25b419004b3c052e Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Mon, 20 Oct 2025 17:10:04 +0100 Subject: [PATCH] CU-869awpaf3: (chore): medcat-trainer: remove cdb_utils, fix vestigial medcat-v2 migration issues --- medcat-trainer/webapp/api/api/medcat_utils.py | 68 ------------------- medcat-trainer/webapp/api/api/metrics.py | 2 +- medcat-trainer/webapp/api/api/model_cache.py | 1 + medcat-trainer/webapp/api/api/views.py | 16 ++--- 4 files changed, 10 insertions(+), 77 deletions(-) delete mode 100644 medcat-trainer/webapp/api/api/medcat_utils.py diff --git a/medcat-trainer/webapp/api/api/medcat_utils.py b/medcat-trainer/webapp/api/api/medcat_utils.py deleted file mode 100644 index 7015645c5..000000000 --- a/medcat-trainer/webapp/api/api/medcat_utils.py +++ /dev/null @@ -1,68 +0,0 @@ -import logging -from collections import defaultdict -from typing import List - -from medcat.cdb import CDB - - -logger = logging.getLogger(__name__) - - -def ch2pt_from_pt2ch(cdb: CDB): - ch2pt = defaultdict(list) - for k, vals in cdb.addl_info['pt2ch'].items(): - for v in vals: - ch2pt[v].append(k) - return ch2pt - - -def get_all_ch(parent_cui: str, cdb: CDB): - all_ch = [parent_cui] - for cui in cdb.addl_info['pt2ch'].get(parent_cui, []): - cui_chs = get_all_ch(cui, cdb) - all_ch += cui_chs - return dedupe_preserve_order(all_ch) - - -def dedupe_preserve_order(items: List[str]) -> List[str]: - seen = set() - deduped_list = [] - for item in items: - if item not in seen: - seen.add(item) - deduped_list.append(item) - return deduped_list - - -def snomed_ct_concept_path(cui: str, cdb: CDB): - try: - top_level_parent_node = '138875005' - - def find_parents(cui, cuis2nodes, child_node=None): - parents = list(cdb.addl_info['ch2pt'][cui]) - all_links = [] - if cui not in cuis2nodes: - curr_node = {'cui': cui, 'pretty_name': cdb.cui2preferred_name[cui]} - if child_node: - curr_node['children'] = [child_node] - cuis2nodes[cui] = curr_node - if len(parents) > 0: - all_links += find_parents(parents[0], cuis2nodes, child_node=curr_node) - for p in parents[1:]: - links = find_parents(p, cuis2nodes) - all_links += [{'parent': p, 'child': cui}] + links - else: - if child_node: - if 'children' not in cuis2nodes[cui]: - cuis2nodes[cui]['children'] = [] - cuis2nodes[cui]['children'].append(child_node) - return all_links - cuis2nodes = dict() - all_links = find_parents(cui, cuis2nodes) - return { - 'node_path': cuis2nodes[top_level_parent_node], - 'links': all_links - } - except KeyError as e: - logger.warning(f'Cannot find path concept path:{e}') - return [] diff --git a/medcat-trainer/webapp/api/api/metrics.py b/medcat-trainer/webapp/api/api/metrics.py index 79d534724..1cdfaf93d 100644 --- a/medcat-trainer/webapp/api/api/metrics.py +++ b/medcat-trainer/webapp/api/api/metrics.py @@ -420,7 +420,7 @@ def meta_anns_concept_summary(self) -> List[Dict]: # Store results for this concept meta_performance[cui] = { 'cui': cui, - 'concept_name': self.cat.cdb.cui2preferred_name[cui], + 'concept_name': self.cat.cdb.cui2info[cui]['preferred_name'], 'meta_tasks': meta_task_results } diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py index e21048517..e3e2a9a84 100644 --- a/medcat-trainer/webapp/api/api/model_cache.py +++ b/medcat-trainer/webapp/api/api/model_cache.py @@ -202,6 +202,7 @@ def clear_cached_medcat(project, cat_map: Dict[str, CAT]=CAT_MAP): def get_cached_cdb(cdb_id: str, cdb_map: Dict[str, CDB]=CDB_MAP) -> CDB: + from api.utils import clear_cdb_cnf_addons if cdb_id not in cdb_map: cdb_obj = ConceptDB.objects.get(id=cdb_id) cdb = CDB.load(cdb_obj.cdb_file.path) diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index 7b5b83328..b2e5215ae 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -11,6 +11,7 @@ from django.shortcuts import render from django.utils import timezone from django_filters import rest_framework as drf +from medcat.utils.cdb_utils import ch2pt_from_pt2ch, get_all_ch, snomed_ct_concept_path from rest_framework import viewsets from rest_framework.decorators import api_view from rest_framework.response import Response @@ -19,7 +20,6 @@ from .admin import download_projects_with_text, download_projects_without_text, \ import_concepts_from_cdb from .data_utils import upload_projects_export -from .medcat_utils import ch2pt_from_pt2ch, get_all_ch, dedupe_preserve_order, snomed_ct_concept_path from .metrics import calculate_metrics from .model_cache import get_medcat, get_cached_cdb, VOCAB_MAP, clear_cached_medcat, CAT_MAP, CDB_MAP, is_model_loaded from .permissions import * @@ -858,11 +858,11 @@ def cdb_cui_children(request, cdb_id): # currently assumes this is using the SNOMED CT terminology try: - root_term = {'cui': '138875005', 'pretty_name': cdb.cui2preferred_name['138875005']} + root_term = {'cui': '138875005', 'pretty_name': cdb.cui2info['138875005']['preferred_name']} if parent_cui is None: return Response({'results': [root_term]}) else: - child_concepts = [{'cui': cui, 'pretty_name': cdb.cui2preferred_name[cui]} + child_concepts = [{'cui': cui, 'pretty_name': cdb.cui2info[cui]['preferred_name']} for cui in cdb.addl_info.get('pt2ch')[parent_cui]] return Response({'results': child_concepts}) except KeyError: @@ -894,7 +894,7 @@ def generate_concept_filter_flat_json(request): for cui in cuis: ch_nodes = get_all_ch(cui, cdb) final_filter += [n for n in ch_nodes if n not in excluded_nodes] - final_filter = dedupe_preserve_order(final_filter) + final_filter = {cui:1 for cui in final_filter}.keys() filter_json = json.dumps(final_filter) response = HttpResponse(filter_json, content_type='application/json') response['Content-Disposition'] = 'attachment; filename=filter.json' @@ -911,8 +911,8 @@ def generate_concept_filter(request): # get all children from 'parent' concepts above. final_filter = {} for cui in cuis: - final_filter[cui] = [{'cui': c, 'pretty_name': cdb.cui2preferred_name[c]} for c in get_all_ch(cui, cdb) - if c in cdb.cui2preferred_name and c != cui] + final_filter[cui] = [{'cui': c, 'pretty_name': cdb.cui2info[cui]['preferred_name']} for c in get_all_ch(cui, cdb) + if c in cdb.cui2info[cui]['preferred_name'] and c != cui] resp = {'filter_len': sum(len(f) for f in final_filter.values()) + len(final_filter.keys())} if resp['filter_len'] < 10000: # only send across concept filters that are small enough to render @@ -928,12 +928,12 @@ def cuis_to_concepts(request): if cdb_id is not None: if cuis is not None: cdb = get_cached_cdb(cdb_id, CDB_MAP) - concept_list = [{'cui': cui, 'name': cdb.cui2preferred_name[cui]} for cui in cuis] + concept_list = [{'cui': cui, 'name': cdb.cui2info[cui]['preferred_name']} for cui in cuis] resp = {'concept_list': concept_list} return Response(resp) else: cdb = get_cached_cdb(cdb_id, CDB_MAP) - concept_list = [{'cui': cui, 'name': cdb.cui2preferred_name[cui]} for cui in cdb.cui2preferred_name.keys()] + concept_list = [{'cui': cui, 'name': cdb.cui2info[cui]['preferred_name']} for cui in cdb.cui2info.keys()] resp = {'concept_list': concept_list} return Response(resp) return HttpResponseBadRequest('Missing either cuis or cdb_id param. Cannot produce concept list.')