In [1]:
!pip install simple-icd-10-cm



In [2]:
OUTPUT_DIR = "./../output/datasets/"
DATA_DIR = "./../data/"

In [36]:
import simple_icd_10_cm as cm
import pandas as pd
import numpy as np
import json
import random

In [125]:
diagnoses = pd.read_csv(DATA_DIR + "d_icd_diagnoses.csv.gz", compression="gzip")
diagnoses = diagnoses[diagnoses["icd_version"] == 10]

cod2ents = {}
desc2cod = {}
cod2desc = {}
for index, row in diagnoses.iterrows():
    cod2ents[row["icd_code"]] = set()
    desc2cod[row["long_title"]] = row["icd_code"]
    cod2desc[row["icd_code"]] = row["long_title"]


pp = []

with open(OUTPUT_DIR + "pairs.jsonl", 'r') as f:
    for line in f:
        pairs = json.loads(line)["pairs"]
        for pair in pairs:
            if pair["label"] in desc2cod:
                cod2ents[desc2cod[pair["label"]]].add(pair["term"].upper())
                pp.append((pair["term"], desc2cod[pair["label"]]))

pairs = pp

In [68]:
tot = list(cod2desc.keys())

In [154]:
def get_cousines(code):
    parent = cm.get_parent(code)
    grandpa = cm.get_parent(parent)
    cousines = [cm.get_children(x) for x in cm.get_children(grandpa) if x != parent]
    cousines = [item for sublist in cousines for item in sublist]
    random.shuffle(cousines)
    return cousines


In [155]:
touples = []
for term, code in pairs:
    upperTerm = term.upper()
    if cm.is_valid_item(code):
        similars = [x for x in cm.get_excludes1(code) + cm.get_excludes2(code) + get_cousines(code) if x.replace(".", "") != code and cm.is_valid_item(x) and cm.is_leaf(x)]
        hardNegatives = [x for x in similars if x.replace(".", "") in cod2ents and upperTerm not in cod2ents[x.replace(".", "")]]
        hardNegatives = [x.replace(".", "") for x in hardNegatives][:5]

        numDistractors = 10 - len(hardNegatives)
        distractors = []
        while len(distractors) < numDistractors:
            distractors = [x for x in random.sample(tot, numDistractors) if x != code and x not in hardNegatives]
            distractors = [x for x in distractors if x.replace(".", "") in cod2ents and upperTerm not in cod2ents[x.replace(".", "")]]
        hardNegatives += distractors
        touples.append((
            term, 
            cod2desc[code], 
            cod2desc[hardNegatives[0]], 
            cod2desc[hardNegatives[1]], 
            cod2desc[hardNegatives[2]], 
            cod2desc[hardNegatives[3]], 
            cod2desc[hardNegatives[4]],
            cod2desc[hardNegatives[5]],
            cod2desc[hardNegatives[6]],
            cod2desc[hardNegatives[7]],
            cod2desc[hardNegatives[8]],
            cod2desc[hardNegatives[9]]
            ))

In [148]:
touples

[('Anterior exenteration',
  'Malignant neoplasm of bladder neck',
  'Malignant neoplasm of right kidney, except renal pelvis',
  'Malignant neoplasm of left kidney, except renal pelvis',
  'Malignant neoplasm of unspecified kidney, except renal pelvis',
  'Malignant neoplasm of right renal pelvis',
  'Malignant neoplasm of left renal pelvis',
  'Cytomegaloviral pancreatitis',
  'Chronic gout due to renal impairment, right knee, with tophus (tophi)',
  'Blister (nonthermal) of right front wall of thorax, sequela',
  'Toxic reaction to local anesthesia during pregnancy, third trimester',
  'Unspecified car occupant injured in collision with sport utility vehicle in nontraffic accident, subsequent encounter'),
 ('Atorvastatin',
  'Hyperlipidemia, unspecified',
  'Classical phenylketonuria',
  'Other hyperphenylalaninemias',
  'Disorders of tryptophan metabolism',
  'Disorder of aromatic amino-acid metabolism, unspecified',
  'Maple-syrup-urine disease',
  'Adverse effect of thrombolytic 

In [170]:
prova = random.sample(touples, 1)
for term, code, neg1, neg2, neg3, neg4, neg5, neg6, neg7, neg8, neg9, neg10 in prova:
    print(term)
    print(code)
    print(neg1)
    print(neg2)
    print(neg3)
    print(neg4)
    print(neg5)
    print(neg6)
    print(neg7)
    print(neg8)
    print(neg9)
    print(neg10)
    print()



Myocarditis
Personal history of other diseases of the circulatory system
Personal history of tuberculosis
Personal history of gestational diabetes
Personal history of latent tuberculosis infection
Personal history of other infectious and parasitic diseases
Personal history of diabetic foot ulcer
Radiculopathy, lumbosacral region
Military operation involving other effects of nuclear weapons, military personnel
Contusion of right ring finger with damage to nail, subsequent encounter
Unspecified fracture of second metacarpal bone, right hand, initial encounter for open fracture
Hereditary and idiopathic neuropathy



In [171]:
# salva il dataset di triple come un dataframe di pandas con 3 colonne "anchor", "positive" e "negative"
df = pd.DataFrame(touples, columns=["anchor", "positive", "negative_1", "negative_2", "negative_3", "negative_4", "negative_5", "negative_6", "negative_7", "negative_8", "negative_9", "negative_10"])
df.to_csv(OUTPUT_DIR + "touples.csv", index=False)


In [172]:
df

Unnamed: 0,anchor,positive,negative_1,negative_2,negative_3,negative_4,negative_5,negative_6,negative_7,negative_8,negative_9,negative_10
0,Anterior exenteration,Malignant neoplasm of bladder neck,"Malignant neoplasm of unspecified kidney, exce...",Malignant neoplasm of unspecified renal pelvis,Malignant neoplasm of left ureter,Malignant neoplasm of paraurethral glands,Malignant neoplasm of left renal pelvis,"Unspecified kyphosis, cervical region",Unspecified superficial injuries of left back ...,Dome fracture of acetabulum,"Other fracture of left great toe, initial enco...",Unspecified fracture of upper end of unspecifi...
1,Atorvastatin,"Hyperlipidemia, unspecified",Other lactose intolerance,"Lipomatosis, not elsewhere classified","Mucopolysaccharidosis, type II",Hyperuricemia without signs of inflammatory ar...,"Volume depletion, unspecified","Glaucoma secondary to other eye disorders, uns...","Fracture of one rib, left side, subsequent enc...","Toxic effect of other tobacco and nicotine, ac...",Puncture wound without foreign body of left ri...,Nondisplaced fracture of epiphysis (separation...
2,Urostomy,Malignant neoplasm of bladder neck,"Malignant neoplasm of urinary organ, unspecified",Malignant neoplasm of overlapping sites of uri...,Malignant neoplasm of left ureter,Malignant neoplasm of urethra,Malignant neoplasm of left renal pelvis,Indeterminate leprosy,"Poisoning by other viral vaccines, accidental ...","Fracture of unspecified metatarsal bone(s), ri...",Sprain of tarsometatarsal ligament of unspecif...,Burn of first degree of multiple sites of left...
3,Bladder cancer,Malignant neoplasm of bladder neck,Malignant neoplasm of right renal pelvis,Malignant neoplasm of urethra,Malignant neoplasm of right ureter,"Malignant neoplasm of urinary organ, unspecified",Malignant neoplasm of left ureter,"Crepitant synovitis (acute) (chronic), unspeci...","Legal intervention, means unspecified, law enf...","Other paralytic strabismus, left eye",Contusion of right great toe with damage to na...,Displaced oblique fracture of shaft of left fe...
4,Losartan,Essential (primary) hypertension,Portal vein thrombosis,Cardiomyopathy in diseases classified elsewhere,Endocarditis and heart valve disorders in dise...,"Endocarditis, valve unspecified",Rheumatic fever without heart involvement,Complete traumatic metacarpophalangeal amputat...,Poisoning by other primarily systemic and hema...,Other specified injury of other blood vessels ...,Breakdown (mechanical) of internal fixation de...,Maternal care for unstable lie
...,...,...,...,...,...,...,...,...,...,...,...,...
473541,Lamivudine,Adverse effect of antineoplastic and immunosup...,Poisoning by antineoplastic and immunosuppress...,Poisoning by antineoplastic and immunosuppress...,Poisoning by antineoplastic and immunosuppress...,Poisoning by antineoplastic and immunosuppress...,Underdosing of antineoplastic and immunosuppre...,Pain associated with micturition,Other specified injury of blood vessel of righ...,"Laceration of extensor or abductor muscles, fa...","Toxic effect of cobra venom, assault","Obstructive and reflux uropathy, unspecified"
473542,Rituxan,Encounter for antineoplastic chemotherapy,Encounter for other specified aftercare,Encounter for therapeutic drug level monitoring,"Unspecified superficial injury of left thigh, ...",Nondisplaced fracture of triquetrum [cuneiform...,Diseases of the circulatory system complicatin...,Displaced fracture of olecranon process with i...,Legal intervention involving unspecified sharp...,"Displaced apophyseal fracture of right femur, ...",Displaced fracture of medial wall of unspecifi...,Displaced fracture of lateral end of left clav...
473543,MGUS,Monoclonal gammopathy,Neoplasm of uncertain behavior of prostate,Neoplasm of uncertain behavior of respiratory ...,Refractory cytopenia with multilineage dysplasia,Neoplasm of uncertain behavior of colon,Neoplasm of uncertain behavior of larynx,"Calcific tendinitis, multiple sites",Asphyxiation due to being trapped in a (discar...,Concussion with loss of consciousness greater ...,Poisoning by other primarily systemic and hema...,"Paper entering through skin, initial encounter"
473544,Aminotransferases,Nonspecific elevation of levels of transaminas...,Elevated erythrocyte sedimentation rate,Other abnormality of red blood cells,Other specified abnormalities of plasma proteins,"Abnormal finding of blood chemistry, unspecified",Finding of other drugs of addictive potential ...,"Burn of first degree of left axilla, sequela",Type 1 diabetes mellitus with diabetic macular...,Other specified injury of unspecified blood ve...,Fracture of radius or ulna following insertion...,"Other fracture of upper end of left tibia, sub..."
