# Classification Model

# MIMIC-CXR Dataset 

In [13]:
from fastai.basics import *
from fastai.text.all import *
import warnings
warnings.filterwarnings('ignore')
# Read in the train and test sets.
path = Path('/home/jupyter/data/mimic-cxr')
df_lm = pd.read_csv(path/"lm.csv")
df_cl = pd.read_csv(path/"labels.csv")
df_train = pd.read_csv(path/"train.csv")
df_test = pd.read_csv(path/"test.csv")

# Multi-Label Classifier

In [14]:
# fix result 
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
SEED = 42
seed_everything(SEED)

### 1. Data Block

In [15]:
labels = ["Atelectasis", "Cardiomegaly", "Consolidation",
              "Edema", "Enlarged Cardiomediastinum", "Fracture", "Lung Lesion", 
              "Lung Opacity", "No Finding", "Pleural Effusion", "Pleural Other", 
              "Pneumonia", "Pneumothorax", "Support Devices"]

In [16]:
bs_cl = (TextBlock.from_df('reports'), 
          MultiCategoryBlock(encoded=True, vocab=labels))

In [17]:
db_cl = DataBlock(blocks=bs_cl,
                      get_x=ColReader('text'),
                      get_y=ColReader(labels),
                      splitter=ColSplitter('is_valid'))

In [18]:
db_cl.summary(df_cl.iloc[:100])

Setting-up type transforms pipelines
Collecting items from                                         dicom_id  \
0   02aa804e-bde0afdd-112c0b34-7bc16630-4e384014   
1   2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab   
2   68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714   
3   096052b7-d256dc40-453a102b-fa7d01c6-1b22c6b4   
4   8959e402-2175d68d-edba5a6c-baab51c3-9359f700   
..                                           ...   
95  325f2526-1ea870c1-06d8ff34-1b02764d-9e336cbc   
96  38a433f3-1d000dff-a774352f-35c0d838-353e023f   
97  4a25692b-e596ad27-5bc2eba3-e518093c-623f4d6a   
98  0d24804d-197942ca-7f32a773-b93ba943-40022beb   
99  a664e3c4-97f37598-e008ddb5-674d8b24-8a49114f   

                                                                                                                                                                                                                                                                                                                            

Setting up Pipeline: ColReader -- {'cols': ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices'], 'pref': '', 'suff': '', 'label_delim': None} -> EncodedMultiCategorize -- {'vocab': ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices'], 'sort': False, 'add_na': False}

Building one sample
  Pipeline: ColReader -- {'cols': 'text', 'pref': '', 'suff': '', 'label_delim': None} -> Tokenizer -> Numericalize
    starting from
      dicom_id                                02aa804e-bde0afdd-112c0b34-7bc16630-4e384014
Atelectasis                                                                        0
Cardiomegaly                                    

Setting up Pipeline: ColReader -- {'cols': ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices'], 'pref': '', 'suff': '', 'label_delim': None} -> EncodedMultiCategorize -- {'vocab': ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices'], 'sort': False, 'add_na': False}
Setting up after_item: Pipeline: ToTensor
Setting up before_batch: Pipeline: Pad_Chunk -- {'pad_idx': 1, 'pad_first': True, 'seq_len': 72}
Setting up after_batch: Pipeline: 

Building one batch
Applying item_tfms to the first sample:
  Pipeline: ToTensor
    starting from
      (TensorText([ 2,  8, 17, 50, 72, 69,  9]), TensorMultiCategory([0., 0., 0., 0., 0., 0., 0., 0., 1., 

### 2. Data Loader

In [19]:
dl_cl = db_cl.dataloaders(df_cl)

In [20]:
dl_cl.show_batch()

Unnamed: 0,text,None
0,xxbos xxup xxunk and lateral chest xxunk were xxunk with patient in xxunk \n▁ position . xxmaj xxunk is performed in xxunk comparison with the xxunk xxunk \n▁ xxunk xxunk view chest xxunk of xxrep 3 _ . xxmaj xxunk post \n▁ xxunk unchanged . xxmaj no xxunk of xxunk xxunk xxunk . xxmaj unchanged \n▁ appearance of post - xxunk surgical xxunk in mediastinum xxunk xxunk post \n▁ xxmaj xxunk xxunk . xxmaj no increased xxunk of mediastinal xxunk and no pneumothorax \n▁ in the xxunk area . xxmaj right xxunk xxunk a pulmonary xxunk \n▁ without signs of vascular congestion or acute xxunk . xxmaj on the left lung \n▁ base xxunk xxunk in the left lower lobe area is xxunk and xxunk the \n▁ xxunk contours . xxmaj the lateral view xxunk that these xxunk \n▁ xxunk into the xxunk pleural xxunk on the left side xxunk,Atelectasis;Enlarged Cardiomediastinum
1,"xxbos 1 . xxmaj xxunk post xxunk xxunk for xxup xxunk with stable xxunk cardiac \n▁ and mediastinal contours . xxmaj xxunk bilateral chest tubes and mediastinal \n▁ xxunk xxunk in xxunk . xxmaj there is a left xxunk xxunk xxunk - xxunk catheter \n▁ with xxunk tip in the pulmonary xxunk xxunk . a nasogastric tube is seen \n▁ xxunk below the xxunk with the tip not identified . xxmaj there is an \n▁ endotracheal tube with the xxunk xxunk xxunk into the xxunk \n▁ xxunk of the left xxunk xxunk xxunk . xxmaj xxunk , xxunk to a previous \n▁ xxunk with the patient xxunk xxunk , this is a xxunk - xxunk tube , xxunk \n▁ the xxunk xxunk is not xxunk xxunk . xxmaj xxunk clinical \n▁ xxunk is xxunk . \n▁\n▁ 2 . a xxunk xxunk xxunk is again seen xxunk over the left mid",Atelectasis;Edema;Lung Opacity;Pleural Effusion;Pneumothorax;Support Devices
2,"xxbos xxmaj the right xxup picc tip xxunk over the upper xxup svc . \n▁ xxmaj xxunk and hilar contours unchanged from the prior xxunk . \n▁ xxmaj the xxunk tube is noted to be xxunk xxunk with xxunk to the \n▁ xxunk . xxmaj increased opacification at the left lung base likely xxunk \n▁ pleural effusion with adjacent atelectasis xxunk xxunk , pneumonia xxunk not be \n▁ xxunk xxunk in the xxunk clinical setting . xxmaj xxunk , \n▁ retrocardiac opacification may represent xxunk pleural effusion with \n▁ xxunk atelectasis xxunk xxunk , xxunk process xxunk as pneumonia xxunk not \n▁ be xxunk xxunk in the xxunk clinical setting . xxmaj xxunk right - xxunk \n▁ pleural effusion xxunk not be xxunk . xxmaj xxunk opacification of the right lung \n▁ base may represent atelectasis . \n▁\n▁ xxup xxunk xxunk \n▁ 1 . xxmaj the xxunk tube is noted",Atelectasis;Lung Opacity;Pleural Effusion;Pneumonia;Support Devices
3,"xxbos xxup ap chest compared to most xxunk prior chest radiographs , \n▁ xxrep 3 _ : \n▁\n▁ xxmaj left xxunk xxunk xxunk ends in the upper xxup svc . xxmaj heart is xxunk normal \n▁ size . \n▁\n▁ xxmaj lung volumes are xxunk and there is xxunk xxunk opacification at \n▁ both lung xxunk . xxmaj xxunk the xxunk vein is mildly xxunk , the upper lobe \n▁ pulmonary xxunk is not and xxunk xxunk xxunk the patient has \n▁ xxunk which would xxunk the xxunk of edema , pneumonia should be \n▁ the xxunk xxunk xxunk . xxmaj xxunk xxup ct on xxrep 3 _ xxunk xxunk \n▁ xxunk xxunk and xxunk xxunk atelectasis or aspiration or \n▁ post - pneumonia changes at the right base . xxmaj xxunk xxunk xxunk study suggests a \n▁ xxunk size xxunk in the right upper lobe at the level of the",Edema;Lung Lesion;Lung Opacity;Pneumonia
4,"xxbos xxup ap chest compared to xxrep 3 _ : \n▁\n▁ xxmaj there has been xxunk little change in the position of the right pleural tube at \n▁ the base of the right xxunk , as xxunk as xxunk xxunk xxunk on a xxunk frontal \n▁ view xxunk . xxmaj on xxrep 3 _ , xxunk the lateral view xxunk performed , there xxunk \n▁ xxunk xxunk cm of xxunk in the chest , xxunk xxunk lateral to the base \n▁ of the right lung . xxmaj there xxunk not xxunk to have been xxunk xxunk \n▁ since xxunk . xxmaj this xxunk xxunk no xxunk xxunk xxunk xxunk , for \n▁ xxunk , xxunk there is xxunk or xxunk fluid xxunk . xxmaj the xxunk \n▁ of right pleural effusion , which increased xxunk xxrep 3 _ and xxrep 3 _ , has \n▁ xxunk decreased . xxmaj xxunk",Cardiomegaly;Edema;Pleural Effusion;Support Devices
5,"xxbos xxup ap chest compared to xxrep 3 _ : \n▁\n▁ xxmaj xxunk of xxunk has decreased , there may be a xxunk increase in \n▁ fluid in the xxunk xxunk but the overall xxrep 3 _ of the \n▁ xxunk xxunk are stable . xxmaj xxunk in the left lung has \n▁ decreased , xxunk xxunk of atelectasis in both the lower and upper xxunk . \n▁ xxmaj right lung xxunk vascular engorgement and no xxunk edema . xxmaj there is no \n▁ right pleural effusion . xxup et tube has advanced , now below the level of the \n▁ xxunk xxunk and xxunk than 2 cm from the carina . xxmaj some of that is due to change \n▁ in position of the xxunk and xxunk , but xxunk the patient at xxunk of xxunk \n▁ xxunk xxunk xxunk 2 cm . \n▁\n▁ a xxunk tube xxunk into",Atelectasis;Enlarged Cardiomediastinum;Support Devices
6,"xxbos xxup xxunk xxup increase xxup in xxup large xxup xxunk xxup right xxup perihilar xxup consolidation xxup since xxrep 3 _ xxup could xxup be xxup xxunk xxup pneumonia . xxup mild xxup edema xxup in xxup the xxup left xxup lung xxup has xxup increased \n▁ xxup as xxup xxunk . xxup consolidation xxup in xxup the xxup left xxup lower xxup lobe , xxup xxunk xxup xxunk xxup xxunk , xxup has \n▁ xxup not xxup xxunk xxup in xxup could xxup be xxup xxunk xxup xxunk xxup of xxup infection . xxup small xxup right xxup pleural \n▁ xxup effusion xxup has xxup increased . xxup moderate xxup cardiomegaly xxup with xxup xxunk xxup left xxup xxunk \n▁ xxup xxunk xxup is xxup unchanged . \n▁\n▁ xxup et xxup tube xxup and xxup right xxup xxunk xxup line xxup are xxup in xxup standard xxup xxunk",Atelectasis;Cardiomegaly;Consolidation;Edema;Pleural Effusion;Pneumonia;Support Devices
7,"xxbos xxup ap chest compared to xxrep 3 _ : \n▁\n▁ xxmaj atelectasis in the left lung base above the xxunk xxunk xxunk \n▁ xxunk has improved . xxmaj the xxunk xxunk xxunk to the left of the \n▁ cardiac silhouette is in a xxunk xxunk . xxmaj xxunk the left xxunk \n▁ xxunk xxunk xxunk or there is xxunk xxunk xxunk of \n▁ xxunk xxunk into the left chest . xxmaj moderate cardiomegaly is stable , but \n▁ the xxunk of xxunk mediastinal xxunk which xxunk most xxunk prior to \n▁ xxunk xxunk is xxunk xxunk xxunk . xxmaj mediastinal xxunk is xxunk \n▁ and there is a small left pleural effusion , but no pulmonary edema . xxmaj the \n▁ xxunk xxunk ends above the xxunk of xxunk xxunk in the left \n▁ xxunk , unchanged since the xxunk xxunk xxunk . xxmaj right xxunk \n▁ line ends",Atelectasis;Cardiomegaly;Pleural Effusion;Support Devices
8,"xxbos xxmaj patient xxunk xxunk xxunk xxunk in xxunk xxunk - xxunk \n▁ position xxunk xxup ap frontal and left lateral xxunk . xxmaj an xxunk left - xxunk \n▁ xxunk xxunk xxunk xxunk xxunk . xxmaj comparison is made with the xxunk xxunk \n▁ xxunk chest xxunk of xxrep 3 _ . xxmaj the xxunk xxunk and \n▁ xxunk xxunk xxunk in the right xxunk xxunk and represent \n▁ the patient xxunk advanced lung xxunk . xxmaj mild mediastinal xxunk xxunk the right \n▁ xxunk lower lobe xxunk , xxunk unchanged . xxmaj linear basal atelectasis on \n▁ the left base as xxunk and mild xxunk of the left lateral pleural xxunk is \n▁ identified . xxmaj no evidence of new xxunk xxunk in the left \n▁ xxunk . xxmaj the lateral view xxunk that the pleural effusion xxunk \n▁ into the xxunk pleural xxunk , which xxunk xxunk",Atelectasis;Pleural Effusion


### 3. Training

In [21]:
text_classifier_learner??

[0;31mSignature:[0m
[0mtext_classifier_learner[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdls[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0march[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mseq_len[0m[0;34m=[0m[0;36m72[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mconfig[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbackwards[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpretrained[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdrop_mult[0m[0;34m=[0m[0;36m0.5[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_out[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlin_ftrs[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mps[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_len[0m[0;34m=[0m[0;36m1440[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0my_range[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m   

In [22]:
loss_func = BCEWithLogitsLossFlat(thresh=0.8)
metrics = [partial(accuracy_multi, thresh=0.8), 
           F1ScoreMulti(average='macro'),
           PrecisionMulti (average='macro'),
           RecallMulti (average='macro'),
           RocAucMulti (average='macro')]

In [23]:
learn_cl = text_classifier_learner(dl_cl, AWD_LSTM, metrics=metrics, loss_func=loss_func)

In [24]:
learn_cl.to_fp16()
learn_cl.fine_tune(10)

epoch,train_loss,valid_loss,accuracy_multi,f1_score,precision_score,recall_score,roc_auc_score,time
0,0.298074,0.293275,0.852732,0.329436,0.556032,0.263799,0.807848,01:02


epoch,train_loss,valid_loss,accuracy_multi,f1_score,precision_score,recall_score,roc_auc_score,time
0,0.23256,0.211323,0.895571,0.54059,0.686906,0.471484,0.897348,01:39
1,0.164115,0.132119,0.950082,0.667675,0.767849,0.634463,0.942396,01:39
2,0.122536,0.098486,0.966164,0.759278,0.856039,0.723474,0.965042,01:40
3,0.105556,0.089062,0.97063,0.807966,0.85844,0.782706,0.972874,01:39
4,0.097581,0.080324,0.973734,0.82768,0.870449,0.802926,0.975425,01:38
5,0.088651,0.075611,0.974278,0.83083,0.874848,0.803812,0.977752,01:38
6,0.081026,0.07795,0.973879,0.824182,0.874967,0.797202,0.978859,01:46
7,0.081683,0.072619,0.974968,0.831501,0.880917,0.802782,0.980392,01:39
8,0.076505,0.075065,0.974351,0.830359,0.885278,0.8,0.980693,01:39
9,0.077682,0.072027,0.975821,0.836422,0.876237,0.812376,0.980977,01:39
