diff --git a/multimodal/openi_multilabel_classification_transchex/README.md b/multimodal/openi_multilabel_classification_transchex/README.md index 970e3a5969..d9ec346185 100644 --- a/multimodal/openi_multilabel_classification_transchex/README.md +++ b/multimodal/openi_multilabel_classification_transchex/README.md @@ -14,10 +14,8 @@ completed, the dataset can be readily used for the tutorial. ### Preprocessing Steps 1) Create a new folder named 'monai_data' for downloading the raw data and preprocessing. -2) Download the chest X-ray images in PNG format from this [link](https://openi.nlm.nih.gov/imgs/collections/NLMCXR_png.tgz). Copy the downloaded file (NLMCXR_png.tgz) -to 'monai_data' directory and extract it. -3) Download the reports in XML format from this [link](https://openi.nlm.nih.gov/imgs/collections/NLMCXR_reports.tgz). Copy the downloaded file (NLMCXR_reports.tgz) -to 'monai_data' directory and extract it. +2) Download the chest X-ray images in PNG format from this [link](https://openi.nlm.nih.gov/imgs/collections/NLMCXR_png.tgz). Copy the downloaded file (NLMCXR_png.tgz) to 'monai_data' directory and extract it to 'monai_data/dataset_orig/NLMCXR_png/'. +3) Download the reports in XML format from this [link](https://openi.nlm.nih.gov/imgs/collections/NLMCXR_reports.tgz). Copy the downloaded file (NLMCXR_reports.tgz) to 'monai_data' directory and extract it to 'monai_data/dataset_orig/NLMCXR_reports/'. 4) Download the splits of train, validation and test datasets from this [link](https://drive.google.com/u/1/uc?id=1jvT0jVl9mgtWy4cS7LYbF43bQE4mrXAY&export=download). Copy the downloaded file (TransChex_openi.zip) -to 'monai_data' directory and extract it. +to 'monai_data' directory and extract it here. 5) Run 'preprocess_openi.py' to process the images and reports. diff --git a/multimodal/openi_multilabel_classification_transchex/preprocess_openi.py b/multimodal/openi_multilabel_classification_transchex/preprocess_openi.py index dcba4b0d05..3204746a7c 100644 --- a/multimodal/openi_multilabel_classification_transchex/preprocess_openi.py +++ b/multimodal/openi_multilabel_classification_transchex/preprocess_openi.py @@ -28,6 +28,8 @@ def create_report(img_names_list_, report_list_, gt_list_, save_add): report_file_add= './monai_data/dataset_orig/NLMCXR_reports/ecgen-radiology' img_file_add= './monai_data/dataset_orig/NLMCXR_png' +npy_add= './monai_data/TransChex_openi/' + img_save_add = './monai_data/dataset_proc/images' report_train_save_add = './monai_data/dataset_proc/train.csv' report_val_save_add = './monai_data/dataset_proc/validation.csv' @@ -37,15 +39,15 @@ def create_report(img_names_list_, report_list_, gt_list_, save_add): os.makedirs(img_save_add) report_files = [f for f in listdir(report_file_add) if isfile(join(report_file_add, f))] -train_data = np.load('./train.npy', allow_pickle=True).item() +train_data = np.load(npy_add+'train.npy', allow_pickle=True).item() train_data_id = train_data['id_GT'] train_data_gt = train_data['label_GT'] -val_data = np.load('./validation.npy', allow_pickle=True).item() +val_data = np.load(npy_add+'validation.npy', allow_pickle=True).item() val_data_id = val_data['id_GT'] val_data_gt = val_data['label_GT'] -test_data = np.load('./test.npy', allow_pickle=True).item() +test_data = np.load(npy_add+'test.npy', allow_pickle=True).item() test_data_id = test_data['id_GT'] test_data_gt = test_data['label_GT']