In [30]:
from functools import partial
import pandas as pd

In [31]:
def add_path(file_, path):
  '''
  add the images folder to the file name

  Args:
    file_ (str) --> file that is inside the folder
    path (str) --> folder that contains the files
  
  Returns:
    filepath (str) --> file with destination path
  '''

  return path + file_

In [1]:
!cp /content/drive/MyDrive/datasets/chexpert-v10-small.zip /content/
!unzip /content/chexpert-v10-small.zip

In [73]:
chexpert_df = pd.read_csv('/content/CheXpert-v1.0-small/train.csv')
chexpert_df = chexpert_df.fillna(0)

chexpert_df = chexpert_df.drop(['Age', 'Enlarged Cardiomediastinum', 
                                'Cardiomegaly', 'Pleural Other',
                                'Fracture', 'Support Devices'], axis = 1)

chexpert_df = chexpert_df[chexpert_df['Frontal/Lateral'] == 'Frontal']
chexpert_df = chexpert_df[(chexpert_df['AP/PA'] == 'AP') |
                          (chexpert_df['AP/PA'] == 'PA')]
chexpert_df = chexpert_df.drop(['Frontal/Lateral', 'AP/PA'], axis = 1)

In [74]:
path = list(chexpert_df['Path'])
partial_add_path = partial(add_path, path = '/content/')
path = list(map(partial_add_path, path))
chexpert_df['Path'] = path
chexpert_df['Sex'].replace({'Female': 'female', 'Male': 'male'}, inplace = True)

In [75]:
findings = list()
classes = ['No Finding', 'Lung Opacity', 'Lung Lesion', 'Edema', 
           'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax',
           'Pleural Effusion']

for classe in classes:
  classes_aux = classes.copy()
  classes_aux.remove(classe)
  finding = chexpert_df[(chexpert_df[classe] == 1.0) & 
                        (chexpert_df[classes_aux[0]] == 0.0) &
                        (chexpert_df[classes_aux[1]] == 0.0) &
                        (chexpert_df[classes_aux[2]] == 0.0) &
                        (chexpert_df[classes_aux[3]] == 0.0) &
                        (chexpert_df[classes_aux[4]] == 0.0) &
                        (chexpert_df[classes_aux[5]] == 0.0) &
                        (chexpert_df[classes_aux[6]] == 0.0) &
                        (chexpert_df[classes_aux[7]] == 0.0)]
  findings.append(finding)

del findings[0]

chexpert_df = pd.concat((findings[0], findings[1], findings[2], findings[3],
                         findings[4], findings[5], findings[6], findings[7]))

In [76]:
chexpert_df = chexpert_df.drop('No Finding', axis = 1)
chexpert_df['Dataset'] = ['CheXpert'] * len(chexpert_df)
columns = ['Dataset', 'Path', 'Sex', 'Lung Opacity', 'Lung Lesion', 'Edema', 
           'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax',
           'Pleural Effusion']
chexpert_df = chexpert_df[columns]

In [77]:
chexpert_df.to_csv(path_or_buf = 'chexpert_df.csv', columns = columns)