# Notebook for extracting the images for annotation

We will extract all images labeled as having at least one of the following four tubes (independently of label method) in PadChest:
- 'chest drain tube'
- 'nsg tube'
- 'endotracheal tube'
- 'tracheostomy tube'

In [1]:
# Imports
import pandas as pd
from collections import Counter

In [2]:
# Loading the preprocessed dataframe
data = pd.read_csv("../Data/preprocessed_df.csv", index_col=0)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
data

Unnamed: 0,ImageID,ImageDir,StudyDate_DICOM,StudyID,PatientID,PatientBirth,PatientSex_DICOM,ViewPosition_DICOM,Projection,MethodProjection,...,ExposureTime,RelativeXRayExposure_DICOM,ReportID,Report,MethodLabel,Labels,Localizations,LabelsLocalizationsBySentence,labelCUIS,LocalizationsCUIS
0,20536686640136348236148679891455886468_k6ga29.png,0,20140915,20536686640136348236148679891455886468,839860488694292331637988235681460987,1930.0,F,POSTEROANTERIOR,PA,Manual review of DICOM fields,...,10,-1.42,4765777,sin hallazg patolog edad pacient .,Physician,['normal'],[],"[['normal'], ['normal']]",[],[]
1,135803415504923515076821959678074435083_fzis7b...,0,20150914,135803415504923515076821959678074435083,313572750430997347502932654319389875966,1929.0,M,POSTEROANTERIOR,PA,Manual review of DICOM fields,...,10,,4991845,cambi pulmonar cronic sever . sign fibrosis b...,Physician,"['pseudonodule', 'chronic changes', 'ground gl...","['loc basal', 'loc basal bilateral']","[['pulmonary fibrosis', 'loc basal bilateral']...",['C0034069' 'C0742362' 'C2115817' 'C3544344'],['C1282378']
2,113855343774216031107737439268243531979_3k951l...,0,20150717,113855343774216031107737439268243531979,50783093527901818115346441867348318648,1925.0,F,POSTEROANTERIOR,PA,Manual review of DICOM fields,...,8,,4955977,. . siluet cardi mediastin dentr normal . cam...,Physician,['chronic changes'],"['loc cardiac', 'loc mediastinum', 'loc costop...","[['chronic changes'], ['chronic changes'], ['n...",['C0742362'],['C1522601' 'C0025066' 'C0230151']
3,313903302629300007485735352869488750471_75sg0k...,0,20170125,313903302629300007485735352869488750471,3707275269754751392247446521222810702,1978.0,M,,AP,Manual review of DICOM fields,...,,1192,5310833,objet mejori radiolog con practic resolucion ...,Physician,"['interstitial pattern', 'alveolar pattern', '...","['loc basal', 'loc left']","[['unchanged'], ['alveolar pattern', 'intersti...",['C1332240' 'C2073538'],['C1282378' 'C0443246']
4,3137231742710829928-247610802266403640553_kine...,0,20150804,3137231742710829928-247610802266403640553,93535126770783451980359712286922420997,1957.0,M,,PA,Manual review of DICOM fields,...,0,299,4969767,radiografi actual comp con previ 26 juni pers...,Physician,"['sternotomy', 'suture material', 'laminar ate...","['loc basal', 'loc subsegmental', 'loc pleural...","[['laminar atelectasis', 'loc subsegmental', '...",['C2073625' 'C4305366' 'C0185792'],['C1282378' 'C0929165' 'C0032225' 'C0444532' '...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109039,1284011361929414522814654121696751542351444145...,49,20110321,1284011361929414522814654121696751542351444145...,112930952416074060371371014599496493673,1948.0,M,POSTEROANTERIOR,PA,Manual review of DICOM fields,...,10,-0.69,4018689,import sign radiolog epoc . pinzamient ambos ...,RNN_model,"['costophrenic angle blunting', 'copd signs']",['loc costophrenic angle'],"['COPD signs', 'costophrenic angle blunting', ...",['C0024117' 'C0742855'],['C0230151']
109040,1284011361929414522094646571696751542351444145...,49,20090609,1284011361929414522094646571696751542351444145...,282743729971423358706056731890510600934,1944.0,F,POSTEROANTERIOR,PA,Manual review of DICOM fields,...,10,-0.85,3639865,dentr normal .,RNN_model,['normal'],[],['normal'],[],[]
109041,1284011361929414522086390631696751542351444145...,49,20110415,1284011361929414522086390631696751542351444145...,52648743308541843883453242716226652771,1965.0,M,,AP_horizontal,Manual review of DICOM fields,...,,784,4035503,tub endotraqueal sond nasogastr situacion cor...,RNN_model,"['nsg tube', 'endotracheal tube']",['loc tracheal'],"['NSG tube', ' endotracheal tube', 'loc trache...",['C0336630'],['C0040578']
109042,1284011361929414522084108901696751542351444145...,49,20101214,1284011361929414522084108901696751542351444145...,228646130593152933811948996634154201216,1943.0,F,POSTEROANTERIOR,PA,Manual review of DICOM fields,...,10,-1,3958189,dentr normal .,RNN_model,['normal'],[],['normal'],[],[]


## Extraction

In [3]:
# Defining the four tube types
tube_types = ['chest drain tube', 'nsg tube', 'endotracheal tube', 'tracheostomy tube']

In [4]:
# Extracting the images with positive tube labels
tube_df = data[(data['Labels'].str.contains(tube_types[0])) | (data['Labels'].str.contains(tube_types[1])) |
               (data['Labels'].str.contains(tube_types[2])) | (data['Labels'].str.contains(tube_types[3]))].reset_index(drop=True)

print('Number of tube images: ', len(tube_df))

Number of tube images:  6946


In [5]:
tube_df[:2]

Unnamed: 0,ImageID,ImageDir,StudyDate_DICOM,StudyID,PatientID,PatientBirth,PatientSex_DICOM,ViewPosition_DICOM,Projection,MethodProjection,...,ExposureTime,RelativeXRayExposure_DICOM,ReportID,Report,MethodLabel,Labels,Localizations,LabelsLocalizationsBySentence,labelCUIS,LocalizationsCUIS
0,313723174271082992847610802266403640553-2_40kx...,0,20150420,313723174271082992847610802266403640553-2,93535126770783451980359712286922420997,1957.0,M,,AP_horizontal,Manual review of DICOM fields,...,,441,4901773,derram pleural bilateral predomini derech . p...,Physician,"['pleural effusion', 'unchanged', 'interstitia...","['loc pleural', 'loc right', 'loc hemithorax',...","[['alveolar pattern', 'interstitial pattern', ...",['C1332240' 'C2073625' 'C0336630' 'C0185792' '...,['C0032225' 'C0444532' 'C0934569' 'C0040578' '...
1,313723174271082992847610802266403640553_w8dk8c...,0,20150417,313723174271082992847610802266403640553,93535126770783451980359712286922420997,1957.0,M,,AP_horizontal,Manual review of DICOM fields,...,,534,4900821,derram pleural bilateral predomini derech . p...,Physician,"['pleural effusion', 'endotracheal tube', 'inf...","['loc pleural', 'loc right', 'loc hemithorax',...","[['pleural effusion both sides', 'loc pleural'...",['C2073625' 'C0277877' 'C0336630' 'C0185792'],['C0032225' 'C0444532' 'C0934569' 'C0040578' '...


### Saving in an annotation file

In [10]:
# Shuffling the rows
shuffled = tube_df.sample(frac=1, random_state=123).reset_index(drop=True)

# Grabbing just the two necessary columns
to_be_annotated_new_df = shuffled[['ImageDir', 'ImageID']].copy()

# Adding empty columns for tube annotations
to_be_annotated_new_df["Chest_drain_tube"] = ""
to_be_annotated_new_df["NSG_tube"] = ""
to_be_annotated_new_df["Endotracheal_tube"] = ""
to_be_annotated_new_df["Tracheostomy_tube"] = ""
to_be_annotated_new_df.head()

# Saving the list in an excel file to be used for annotating
to_be_annotated_new_df.to_excel("../Annotation/Annotations_a1.xlsx")
#print('Saved :)')

In [19]:
to_be_annotated_new_df

Unnamed: 0,ImageDir,ImageID,Chest_drain_tube,NSG_tube,Endotracheal_tube,Tracheostomy_tube
0,47,216840111366964012339356563862009041090824095_...,,,,
1,13,216840111366964013686042548532013298085500915_...,,,,
2,39,216840111366964013076187734852011188142138830_...,,,,
3,30,216840111366964013451228379692012296085047250_...,,,,
4,46,216840111366964012339356563862009054142757213_...,,,,
...,...,...,...,...,...,...
6941,39,216840111366964013076187734852011188085128751_...,,,,
6942,32,216840111366964012819207061112010263092410622_...,,,,
6943,16,216840111366964012810946289282010225111709457_...,,,,
6944,28,216840111366964013217898866992011349101317577_...,,,,
