In [None]:
# Import libraries
from datetime import timedelta
import os

import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Make pandas dataframes prettier
from IPython.display import display, HTML, Image
%matplotlib inline

plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20})

# Access data using Google BigQuery.
from google.colab import auth
from google.cloud import bigquery

In [None]:
# authenticate
auth.authenticate_user()

In [None]:
# Set up environment variables
project_id = 'autoicd'
if project_id == 'CHANGE-ME':
  raise ValueError('You must change project_id to your GCP project.')
os.environ["GOOGLE_CLOUD_PROJECT"] = project_id

# Read data from BigQuery into pandas dataframes.
def run_query(query, project_id=project_id):
  return pd.io.gbq.read_gbq(
      query,
      project_id=project_id,
      dialect='standard')

# set the dataset
# if you want to use the demo, change this to mimic_demo
dataset = 'mimic'

# test it works
df = run_query("""
SELECT subject_id
FROM `physionet-data.mimic_core.patients`
WHERE subject_id = 10012853
""")
assert df.shape[0] == 1, 'unable to query MIMIC-IV!'

In [None]:
#Load ICD Descriptions and Pt Diagnoses

query = f"""
SELECT dx.*, di.long_title
FROM `physionet-data.{dataset}_hosp.diagnoses_icd` dx
INNER JOIN `physionet-data.{dataset}_hosp.d_icd_diagnoses` di
  ON dx.icd_code = di.icd_code
ORDER BY dx.seq_num
"""
dx = run_query(query)
dx

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,long_title
0,17377265,21007512,1,27801,9,Morbid obesity
1,11534138,29080299,1,64891,9,Other current conditions classifiable elsewher...
2,13950979,24478406,1,4414,9,Abdominal aneurysm without mention of rupture
3,10687380,28820008,1,99769,9,Other amputation stump complication
4,12820032,25925201,1,2182,9,Subserous leiomyoma of uterus
...,...,...,...,...,...,...
4669934,17396940,26463898,39,R740,10,Nonspecific elevation of levels of transaminas...
4669935,13881772,26842900,39,42731,9,Atrial fibrillation
4669936,17449370,29432247,39,V1582,9,Personal history of tobacco use
4669937,15263884,27091162,39,V1582,9,Personal history of tobacco use


Getting primary ICD-10-CM Codes.

In [None]:
dx = dx[(dx.seq_num == 1) & (dx.icd_version == 10) ]
dx

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,long_title
2400,19617946,21607191,1,O4103X0,10,"Oligohydramnios, third trimester, not applicab..."
2433,16479239,28035417,1,O701,10,Second degree perineal laceration during delivery
2447,11858186,23568917,1,O76,10,Abnormality in fetal heart rate and rhythm com...
2453,16440000,25853106,1,M1712,10,"Unilateral primary osteoarthritis, left knee"
2493,11582633,20826664,1,F333,10,"Major depressive disorder, recurrent, severe w..."
...,...,...,...,...,...,...
438928,19024087,23464962,1,I2510,10,Atherosclerotic heart disease of native corona...
438929,18307375,20168701,1,I2510,10,Atherosclerotic heart disease of native corona...
438930,15045698,22802131,1,I2510,10,Atherosclerotic heart disease of native corona...
438931,15658630,25746044,1,I2510,10,Atherosclerotic heart disease of native corona...


Obtaining CXR Records

In [None]:
#CXR Study Paths

query = f"""
select
  subject_id, study_id, path
from `physionet-data.mimic_cxr.study_list`
ORDER BY study_id
"""
cxr_studies = run_query(query)

# CXR DICOM info
query = f"""
select
  subject_id, study_id, dicom_id, path
from `physionet-data.mimic_cxr.record_list`
ORDER BY study_id, dicom_id
"""
cxr_records = run_query(query)

display(cxr_records.head())
display(cxr_studies.head())

Unnamed: 0,subject_id,study_id,dicom_id,path
0,11941242,50000014,dffc8ab2-ff37704f-2fb29e6d-51e08075-88bca914,files/p11/p11941242/s50000014/dffc8ab2-ff37704...
1,19995127,50000028,1ff9650f-4cb5af18-f6caef33-e53686b7-983cca76,files/p19/p19995127/s50000028/1ff9650f-4cb5af1...
2,19995127,50000028,33708b7f-979612cb-46e1424c-318354bc-6f043189,files/p19/p19995127/s50000028/33708b7f-979612c...
3,15752761,50000052,a09e5a6c-9635efa7-de26c04a-28104ce9-e5f798d6,files/p15/p15752761/s50000052/a09e5a6c-9635efa...
4,15641478,50000103,2494a682-83c9ec78-4727272f-25733f42-58d9f47c,files/p15/p15641478/s50000103/2494a682-83c9ec7...


Unnamed: 0,subject_id,study_id,path
0,11941242,50000014,files/p11/p11941242/s50000014.txt
1,19995127,50000028,files/p19/p19995127/s50000028.txt
2,15752761,50000052,files/p15/p15752761/s50000052.txt
3,15641478,50000103,files/p15/p15641478/s50000103.txt
4,19309850,50000125,files/p19/p19309850/s50000125.txt


In [None]:
query = f"""
SELECT
  dicom, StudyID, PatientID, StudyDate, StudyTime
from `physionet-data.mimic_cxr.dicom_metadata_string`
ORDER BY 4, 5
"""
cxr_metadata = run_query(query)
display(cxr_metadata)

Unnamed: 0,dicom,StudyID,PatientID,StudyDate,StudyTime
0,c2a3cbe8-ad80fc7b-9696e471-e41fb7a7-b0a2d9d9,55012421,17195991,21100111,220111.359
1,1b9352eb-9ae051ae-8fd2c6ad-165db0b8-e74211a2,59967459,17195991,21100112,110815.593
2,06980cc1-94b37c18-8297f246-90ffe664-bc51fd35,56500894,17195991,21100113,092700.921
3,43520d74-e2328ddf-3ee9c45e-d08e6e10-c999234c,53577512,17195991,21100114,040913.203
4,b865264e-a6737a8e-55666fa7-1fcf16ed-bbf5df04,53577512,17195991,21100114,040913.203
...,...,...,...,...,...
377105,7301ca4f-64a07df9-5d7e9239-4d95f52d-870bac17,57603462,10409970,22081113,123817.453
377106,21596d1e-9a8bf3e0-27f3ed29-45c8e9c0-afc52952,57603462,10409970,22081113,123817.453
377107,4c9017e9-27497504-93954496-4f5d3160-8c74aa64,58645858,13774741,22081115,171526.656
377108,79543d28-5ccda488-59d0b120-614445f4-c91a0249,58645858,13774741,22081115,171526.656


In [None]:
cxr_combined = cxr_records.merge(cxr_metadata, left_on='dicom_id', right_on='dicom', how='inner')
cxr_combined = cxr_combined.merge(cxr_studies, left_on='study_id',right_on = 'study_id',how = 'inner')
cxr_combined = cxr_combined.rename(columns={'path_y':'text_path','subject_id_x':'subject_id','StudyDate':'study_date','StudyTime':'study_time'}).drop(['dicom','subject_id_y','path_x','StudyID','PatientID'],axis=1)
cxr_combined

Unnamed: 0,subject_id,study_id,dicom_id,study_date,study_time,text_path
0,11941242,50000014,dffc8ab2-ff37704f-2fb29e6d-51e08075-88bca914,21720525,132301.281,files/p11/p11941242/s50000014.txt
1,19995127,50000028,1ff9650f-4cb5af18-f6caef33-e53686b7-983cca76,21380307,185131.484,files/p19/p19995127/s50000028.txt
2,19995127,50000028,33708b7f-979612cb-46e1424c-318354bc-6f043189,21380307,185131.484,files/p19/p19995127/s50000028.txt
3,15752761,50000052,a09e5a6c-9635efa7-de26c04a-28104ce9-e5f798d6,21660519,033434.562,files/p15/p15752761/s50000052.txt
4,15641478,50000103,2494a682-83c9ec78-4727272f-25733f42-58d9f47c,21451120,140206.187,files/p15/p15641478/s50000103.txt
...,...,...,...,...,...,...
377105,18081075,59999880,c21f1512-0788cbd5-1eb83a90-d5278f83-ef513d83,21490404,153359.453,files/p18/p18081075/s59999880.txt
377106,15411545,59999888,4dc598f1-4c3ca6c9-795de4eb-93d90886-28af63e5,21890920,143731.000,files/p15/p15411545/s59999888.txt
377107,15411545,59999888,b15a3c64-92fe10c7-2a7885cc-f199a8f6-a16e469a,21890920,143731.000,files/p15/p15411545/s59999888.txt
377108,11069411,59999924,1cf7dea0-479cf50f-ee813007-eef46fd5-4184dd07,21911110,144137.000000,files/p11/p11069411/s59999924.txt


In [None]:
cxr_combined.iloc[0]['study_date']

'21720525'

In [None]:
from tqdm.notebook import tqdm_notebook

def get_date_time(row):
  studytime = pd.to_datetime('T'.join([row['study_date'], row['study_time']])) #becomes timestamp object
  return studytime

tqdm_notebook.pandas('getting study times')
cxr_combined['study_date_time']=cxr_combined.progress_apply(get_date_time,axis=1)
cxr_combined

HBox(children=(FloatProgress(value=0.0, max=377110.0), HTML(value='')))




Unnamed: 0,subject_id,study_id,dicom_id,study_date,study_time,text_path,study_date_time
0,11941242,50000014,dffc8ab2-ff37704f-2fb29e6d-51e08075-88bca914,21720525,132301.281,files/p11/p11941242/s50000014.txt,2172-05-25 13:23:01.281
1,19995127,50000028,1ff9650f-4cb5af18-f6caef33-e53686b7-983cca76,21380307,185131.484,files/p19/p19995127/s50000028.txt,2138-03-07 18:51:31.484
2,19995127,50000028,33708b7f-979612cb-46e1424c-318354bc-6f043189,21380307,185131.484,files/p19/p19995127/s50000028.txt,2138-03-07 18:51:31.484
3,15752761,50000052,a09e5a6c-9635efa7-de26c04a-28104ce9-e5f798d6,21660519,033434.562,files/p15/p15752761/s50000052.txt,2166-05-19 03:34:34.562
4,15641478,50000103,2494a682-83c9ec78-4727272f-25733f42-58d9f47c,21451120,140206.187,files/p15/p15641478/s50000103.txt,2145-11-20 14:02:06.187
...,...,...,...,...,...,...,...
377105,18081075,59999880,c21f1512-0788cbd5-1eb83a90-d5278f83-ef513d83,21490404,153359.453,files/p18/p18081075/s59999880.txt,2149-04-04 15:33:59.453
377106,15411545,59999888,4dc598f1-4c3ca6c9-795de4eb-93d90886-28af63e5,21890920,143731.000,files/p15/p15411545/s59999888.txt,2189-09-20 14:37:31.000
377107,15411545,59999888,b15a3c64-92fe10c7-2a7885cc-f199a8f6-a16e469a,21890920,143731.000,files/p15/p15411545/s59999888.txt,2189-09-20 14:37:31.000
377108,11069411,59999924,1cf7dea0-479cf50f-ee813007-eef46fd5-4184dd07,21911110,144137.000000,files/p11/p11069411/s59999924.txt,2191-11-10 14:41:37.000


In [None]:
#Now, subsetting such that only the date matters of the x-ray for written study purposes
cxr_combined['date_object'] = cxr_combined['study_date_time'].apply(lambda x: pd.Timestamp.date(x))
cxr_combined

Unnamed: 0,subject_id,study_id,dicom_id,study_date,study_time,text_path,study_date_time,date_object
0,11941242,50000014,dffc8ab2-ff37704f-2fb29e6d-51e08075-88bca914,21720525,132301.281,files/p11/p11941242/s50000014.txt,2172-05-25 13:23:01.281,2172-05-25
1,19995127,50000028,1ff9650f-4cb5af18-f6caef33-e53686b7-983cca76,21380307,185131.484,files/p19/p19995127/s50000028.txt,2138-03-07 18:51:31.484,2138-03-07
2,19995127,50000028,33708b7f-979612cb-46e1424c-318354bc-6f043189,21380307,185131.484,files/p19/p19995127/s50000028.txt,2138-03-07 18:51:31.484,2138-03-07
3,15752761,50000052,a09e5a6c-9635efa7-de26c04a-28104ce9-e5f798d6,21660519,033434.562,files/p15/p15752761/s50000052.txt,2166-05-19 03:34:34.562,2166-05-19
4,15641478,50000103,2494a682-83c9ec78-4727272f-25733f42-58d9f47c,21451120,140206.187,files/p15/p15641478/s50000103.txt,2145-11-20 14:02:06.187,2145-11-20
...,...,...,...,...,...,...,...,...
377105,18081075,59999880,c21f1512-0788cbd5-1eb83a90-d5278f83-ef513d83,21490404,153359.453,files/p18/p18081075/s59999880.txt,2149-04-04 15:33:59.453,2149-04-04
377106,15411545,59999888,4dc598f1-4c3ca6c9-795de4eb-93d90886-28af63e5,21890920,143731.000,files/p15/p15411545/s59999888.txt,2189-09-20 14:37:31.000,2189-09-20
377107,15411545,59999888,b15a3c64-92fe10c7-2a7885cc-f199a8f6-a16e469a,21890920,143731.000,files/p15/p15411545/s59999888.txt,2189-09-20 14:37:31.000,2189-09-20
377108,11069411,59999924,1cf7dea0-479cf50f-ee813007-eef46fd5-4184dd07,21911110,144137.000000,files/p11/p11069411/s59999924.txt,2191-11-10 14:41:37.000,2191-11-10


Combining MIMIC IV with MIMIC CXR

In [None]:
#Getting POE table from MIMIC-IV
query = f"""
SELECT *
FROM `physionet-data.{dataset}_hosp.poe`
where order_subtype = 'General Xray'
ORDER BY poe_seq
"""
poe = run_query(query)

Unnamed: 0,subject_id,poe_id,poe_seq,ordertime,order_type,order_subtype
0,11475331,11475331-2,2,2185-06-03 16:10:25,Radiology,General Xray
1,17113027,17113027-2,2,2158-05-13 19:25:52,Radiology,General Xray
2,12496782,12496782-2,2,2119-09-15 12:29:01,Radiology,General Xray
3,15430844,15430844-2,2,2125-08-30 18:22:37,Radiology,General Xray
4,18469699,18469699-2,2,2127-05-31 16:04:08,Radiology,General Xray
...,...,...,...,...,...,...
801033,12468016,12468016-19802,19802,2139-08-02 15:41:07,Radiology,General Xray
801034,12468016,12468016-19828,19828,2139-08-02 20:10:40,Radiology,General Xray
801035,12468016,12468016-19862,19862,2139-08-02 23:03:56,Radiology,General Xray
801036,12468016,12468016-19914,19914,2139-08-03 07:43:12,Radiology,General Xray


In [None]:
poe = poe[['subject_id','hadm_id','ordertime','order_subtype']]
poe['date_object'] = poe['ordertime'].apply(lambda x: pd.Timestamp.date(x)) #getting day of x-ray order
poe

Unnamed: 0,subject_id,hadm_id,ordertime,order_subtype,date_object
0,11475331,29674976,2185-06-03 16:10:25,General Xray,2185-06-03
1,17113027,23207932,2158-05-13 19:25:52,General Xray,2158-05-13
2,12496782,25315429,2119-09-15 12:29:01,General Xray,2119-09-15
3,15430844,24693864,2125-08-30 18:22:37,General Xray,2125-08-30
4,18469699,22445985,2127-05-31 16:04:08,General Xray,2127-05-31
...,...,...,...,...,...
801033,12468016,22516568,2139-08-02 15:41:07,General Xray,2139-08-02
801034,12468016,22516568,2139-08-02 20:10:40,General Xray,2139-08-02
801035,12468016,22516568,2139-08-02 23:03:56,General Xray,2139-08-02
801036,12468016,22516568,2139-08-03 07:43:12,General Xray,2139-08-03


In [None]:
#merging mimic-iv and CXR!

cxr_poe_merged = poe.merge(cxr_combined,left_on=['subject_id','date_object'],right_on=['subject_id','date_object'],how='inner')
cxr_poe_merged

Unnamed: 0,subject_id,hadm_id,ordertime,order_subtype,date_object,study_id,dicom_id,study_date,study_time,text_path,study_date_time
0,15430844,24693864,2125-08-30 18:22:37,General Xray,2125-08-30,51251638,44db65a3-49003319-d9c1e768-07da789d-dcfdd1f6,21250830,201028.906,files/p15/p15430844/s51251638.txt,2125-08-30 20:10:28.906
1,15430844,24693864,2125-08-30 18:22:37,General Xray,2125-08-30,51251638,b21addf0-04628816-f174f412-c05589d5-65ac4d6a,21250830,201028.906,files/p15/p15430844/s51251638.txt,2125-08-30 20:10:28.906
2,15430844,24693864,2125-08-30 18:22:37,General Xray,2125-08-30,51251638,dba16c05-4457f07f-758cba05-e7b0838f-048c16c2,21250830,201028.906,files/p15/p15430844/s51251638.txt,2125-08-30 20:10:28.906
3,15430844,24693864,2125-08-30 19:34:55,General Xray,2125-08-30,51251638,44db65a3-49003319-d9c1e768-07da789d-dcfdd1f6,21250830,201028.906,files/p15/p15430844/s51251638.txt,2125-08-30 20:10:28.906
4,15430844,24693864,2125-08-30 19:34:55,General Xray,2125-08-30,51251638,b21addf0-04628816-f174f412-c05589d5-65ac4d6a,21250830,201028.906,files/p15/p15430844/s51251638.txt,2125-08-30 20:10:28.906
...,...,...,...,...,...,...,...,...,...,...,...
357753,11296936,25867062,2151-08-16 11:04:18,General Xray,2151-08-16,59486272,42dbf3a0-3e46dce8-53610755-14793e12-2512c54c,21510816,052012.375,files/p11/p11296936/s59486272.txt,2151-08-16 05:20:12.375
357754,11296936,25867062,2151-08-17 18:35:41,General Xray,2151-08-17,58187935,23ed1288-1582225f-5eb63a04-a9f7990f-5e58c491,21510817,185959.281,files/p11/p11296936/s58187935.txt,2151-08-17 18:59:59.281
357755,11296936,25867062,2151-08-25 13:54:18,General Xray,2151-08-25,53649038,90c4f39e-ce53e4d5-b5c38cd1-d260b557-e74e9c00,21510825,141012.812,files/p11/p11296936/s53649038.txt,2151-08-25 14:10:12.812
357756,11296936,25867062,2151-09-06 17:42:08,General Xray,2151-09-06,55576079,35ef8e34-c9081932-a2854367-a18a02a8-5093f85b,21510906,175113.109,files/p11/p11296936/s55576079.txt,2151-09-06 17:51:13.109


In [None]:
#Merging Diag and All other data!
X = cxr_poe_merged[['subject_id','hadm_id','study_id','text_path']].merge(dx, how='inner',left_on = ['subject_id','hadm_id'],right_on = ['subject_id','hadm_id'])
X

Unnamed: 0,subject_id,hadm_id,study_id,text_path,seq_num,icd_code,icd_version,long_title
0,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i..."
1,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i..."
2,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i..."
3,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i..."
4,13591121,25946841,55568815,files/p13/p13591121/s55568815.txt,1,I5181,10,Takotsubo syndrome
...,...,...,...,...,...,...,...,...
85165,10882916,20990038,55329501,files/p10/p10882916/s55329501.txt,1,K50912,10,"Crohn's disease, unspecified, with intestinal ..."
85166,10882916,20990038,50049131,files/p10/p10882916/s50049131.txt,1,K50912,10,"Crohn's disease, unspecified, with intestinal ..."
85167,10882916,28979953,57203292,files/p10/p10882916/s57203292.txt,1,K9189,10,Other postprocedural complications and disorde...
85168,17716210,28309008,54485107,files/p17/p17716210/s54485107.txt,1,T827XXA,10,Infection and inflammatory reaction due to oth...


In [None]:
!gsutil -m cp -r gs://mimic-cxr-2.0.0.physionet.org/mimic-cxr-reports.zip ./
!unzip /content/mimic-cxr-reports.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
   creating: files/p13/p13664951/
  inflating: files/p13/p13664951/s53174477.txt  
  inflating: files/p13/p13664951/s59971272.txt  
  inflating: files/p13/p13664951/s59776531.txt  
   creating: files/p13/p13162864/
  inflating: files/p13/p13162864/s58968783.txt  
   creating: files/p13/p13138256/
  inflating: files/p13/p13138256/s53796371.txt  
   creating: files/p13/p13349882/
  inflating: files/p13/p13349882/s51884101.txt  
   creating: files/p13/p13558272/
  inflating: files/p13/p13558272/s58519040.txt  
   creating: files/p13/p13139059/
  inflating: files/p13/p13139059/s58456977.txt  
  inflating: files/p13/p13139059/s53760808.txt  
  inflating: files/p13/p13139059/s59925397.txt  
   creating: files/p13/p13496169/
  inflating: files/p13/p13496169/s58654265.txt  
  inflating: files/p13/p13496169/s50627096.txt  
  inflating: files/p13/p13496169/s57048001.txt  
  inflating: files/p13/p13496169/s50868325.txt  
   creating

Now, can pull text files...

In [None]:
pathdir = '/content/'

from tqdm.notebook import tqdm_notebook
def get_text(row):
    fpath= pathdir+row
    with open(fpath, 'r') as file:
        text = file.read()
    return text
tqdm_notebook.pandas(desc='assigning text for radiology studies...')
X['study_text'] = X['text_path'].progress_apply(get_text)
X

HBox(children=(FloatProgress(value=0.0, description='assigning text for radiology studies...', max=85170.0, st…




Unnamed: 0,subject_id,hadm_id,study_id,text_path,seq_num,icd_code,icd_version,long_title,study_text
0,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i...",FINAL REPORT\...
1,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i...",FINAL REPORT\...
2,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i...",FINAL REPORT\...
3,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i...",FINAL REPORT\...
4,13591121,25946841,55568815,files/p13/p13591121/s55568815.txt,1,I5181,10,Takotsubo syndrome,FINAL REPORT\...
...,...,...,...,...,...,...,...,...,...
85165,10882916,20990038,55329501,files/p10/p10882916/s55329501.txt,1,K50912,10,"Crohn's disease, unspecified, with intestinal ...",FINAL REPORT\...
85166,10882916,20990038,50049131,files/p10/p10882916/s50049131.txt,1,K50912,10,"Crohn's disease, unspecified, with intestinal ...",FINAL REPORT\...
85167,10882916,28979953,57203292,files/p10/p10882916/s57203292.txt,1,K9189,10,Other postprocedural complications and disorde...,FINAL REPORT\...
85168,17716210,28309008,54485107,files/p17/p17716210/s54485107.txt,1,T827XXA,10,Infection and inflammatory reaction due to oth...,FINAL REPORT\...


In [None]:
X.to_pickle('/content/drive/My Drive/auto-icd/cxr_iv_linked/X')
X = pd.read_pickle('/content/drive/My Drive/auto-icd/cxr_iv_linked/X')

In [None]:
X = X.drop_duplicates('text_path')
X

Unnamed: 0,subject_id,hadm_id,study_id,text_path,seq_num,icd_code,icd_version,long_title,study_text
0,14807107,23337910,57252050,files/p14/p14807107/s57252050.txt,1,S36438A,10,"Laceration of other part of small intestine, i...",FINAL REPORT\...
4,13591121,25946841,55568815,files/p13/p13591121/s55568815.txt,1,I5181,10,Takotsubo syndrome,FINAL REPORT\...
5,11223126,28538959,59531949,files/p11/p11223126/s59531949.txt,1,R0789,10,Other chest pain,FINAL REPORT\...
11,13233757,27632812,58341345,files/p13/p13233757/s58341345.txt,1,S272XXA,10,"Traumatic hemopneumothorax, initial encounter",FINAL REPORT\...
13,13233757,27632812,57050295,files/p13/p13233757/s57050295.txt,1,S272XXA,10,"Traumatic hemopneumothorax, initial encounter",FINAL REPORT\...
...,...,...,...,...,...,...,...,...,...
85164,10882916,20990038,55329501,files/p10/p10882916/s55329501.txt,1,K50912,10,"Crohn's disease, unspecified, with intestinal ...",FINAL REPORT\...
85166,10882916,20990038,50049131,files/p10/p10882916/s50049131.txt,1,K50912,10,"Crohn's disease, unspecified, with intestinal ...",FINAL REPORT\...
85167,10882916,28979953,57203292,files/p10/p10882916/s57203292.txt,1,K9189,10,Other postprocedural complications and disorde...,FINAL REPORT\...
85168,17716210,28309008,54485107,files/p17/p17716210/s54485107.txt,1,T827XXA,10,Infection and inflammatory reaction due to oth...,FINAL REPORT\...


In [None]:
import plotly.express as px
df = X
fig = px.histogram(df, x="icd_code",histnorm='probability density')
fig.show()