In [1]:
import os
import pandas as pd
import numpy as np
import cv2

In [2]:
full_train_df = pd.read_csv('CheXpert-v1.0-small/train.csv')
full_valid_df = pd.read_csv('CheXpert-v1.0-small/valid.csv')

full_train_df.head()

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,CheXpert-v1.0-small/train/patient00001/study1/...,Female,68,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0
1,CheXpert-v1.0-small/train/patient00002/study2/...,Female,87,Frontal,AP,,,-1.0,1.0,,-1.0,-1.0,,-1.0,,-1.0,,1.0,
2,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Frontal,AP,,,,1.0,,,-1.0,,,,,,1.0,
3,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Lateral,,,,,1.0,,,-1.0,,,,,,1.0,
4,CheXpert-v1.0-small/train/patient00003/study1/...,Male,41,Frontal,AP,,,,,,1.0,,,,0.0,,,,


In [3]:
chexnet_targets = ['No Finding',
       'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity',
       'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis',
       'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture',
       'Support Devices']

chexpert_targets = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Pleural Effusion']

In [4]:
u_one_features = ['Atelectasis', 'Edema']
u_zero_features = ['Cardiomegaly', 'Consolidation', 'Pleural Effusion']

In [5]:
def feature_string(row):
    feature_list = []
    for feature in u_one_features:
        if row[feature] in [-1,1]:
            feature_list.append(feature)
            
    for feature in u_zero_features:
        if row[feature] == 1:
            feature_list.append(feature)
            
    return ';'.join(feature_list)

In [6]:
pd.set_option('display.max_columns', None)  # or 1000
# pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', -1)  # or 199

In [7]:
full_train_df['patient'] = full_train_df.Path.str.split('/',3,True)[2]
full_train_df  ['study'] = full_train_df.Path.str.split('/',4,True)[3]

full_valid_df['patient'] = full_valid_df.Path.str.split('/',3,True)[2]
full_valid_df  ['study'] = full_valid_df.Path.str.split('/',4,True)[3]

full_df = pd.concat([full_train_df, full_valid_df])
full_df['feature_string'] = full_df.apply(feature_string,axis = 1).fillna('')


In [8]:
full_df

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,patient,study,feature_string
0,CheXpert-v1.0-small/train/patient00001/study1/view1_frontal.jpg,Female,68,Frontal,AP,1.0,,,,,,,,,0.0,,,,1.0,patient00001,study1,
1,CheXpert-v1.0-small/train/patient00002/study2/view1_frontal.jpg,Female,87,Frontal,AP,,,-1.0,1.0,,-1.0,-1.0,,-1.0,,-1.0,,1.0,,patient00002,study2,Atelectasis;Edema
2,CheXpert-v1.0-small/train/patient00002/study1/view1_frontal.jpg,Female,83,Frontal,AP,,,,1.0,,,-1.0,,,,,,1.0,,patient00002,study1,
3,CheXpert-v1.0-small/train/patient00002/study1/view2_lateral.jpg,Female,83,Lateral,,,,,1.0,,,-1.0,,,,,,1.0,,patient00002,study1,
4,CheXpert-v1.0-small/train/patient00003/study1/view1_frontal.jpg,Male,41,Frontal,AP,,,,,,1.0,,,,0.0,,,,,patient00003,study1,Edema
5,CheXpert-v1.0-small/train/patient00004/study1/view1_frontal.jpg,Female,20,Frontal,PA,1.0,0.0,,,,,0.0,,,,0.0,,,,patient00004,study1,
6,CheXpert-v1.0-small/train/patient00004/study1/view2_lateral.jpg,Female,20,Lateral,,1.0,0.0,,,,,0.0,,,,0.0,,,,patient00004,study1,
7,CheXpert-v1.0-small/train/patient00005/study1/view1_frontal.jpg,Male,33,Frontal,PA,1.0,,0.0,,,,0.0,,,,0.0,,,1.0,patient00005,study1,
8,CheXpert-v1.0-small/train/patient00005/study1/view2_lateral.jpg,Male,33,Lateral,,1.0,,0.0,,,,0.0,,,,0.0,,,1.0,patient00005,study1,
9,CheXpert-v1.0-small/train/patient00005/study2/view1_frontal.jpg,Male,33,Frontal,AP,,,,,,,,,,1.0,,,,,patient00005,study2,
