In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import ast

In [3]:
!ls

drive  sample_data


In [4]:
%cd /content/drive/MyDrive/siim/

/content/drive/MyDrive/siim


In [5]:
!ls

competition  siim-covid19-detection  summary  tmp  version1


In [11]:
meta=pd.read_csv("summary/meta.csv")
meta_train=meta.loc[meta.split=='train',['image_id','dim1','dim0']]
meta_train.columns=['image_id','width','height']
meta_test=meta.loc[meta.split=='test',['image_id','dim1','dim0']]
meta_test.columns=['image_id','width','height']
meta_train.head()

Unnamed: 0,image_id,width,height
0,47bdddec0c86,3050,2539
1,ad57016e19dc,3408,2800
2,e32caf657784,2642,2585
3,14bd5c85aea2,3004,3052
4,7317e56f9189,2969,2965


In [12]:
train_image_level=pd.read_csv("summary/train_image_level.csv")
train_image_level["id"] = train_image_level["id"].map(lambda x : x.replace("_image",""))
train_image_level.rename(columns={'id':"image_id",'label':"image_label","StudyInstanceUID":"study_id"},inplace=True)
train_image_level.head()

Unnamed: 0,image_id,boxes,image_label,study_id
0,000a312787f2,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity 1 789.28836 582.43035 1815.94498 2499....,5776db0cec75
1,000c3a3f293f,,none 1 0 0 1 1,ff0879eb20ed
2,0012ff7358bc,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity 1 677.42216 197.97662 1545.21983 1197....,9d514ce429a7
3,001398f4ff4f,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity 1 2729 2181.33331 3677.00012 2785.33331,28dddc8559b2
4,001bd15d1891,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity 1 623.23328 1050 1337.23328 2156 opaci...,dfd9fdd85a3e


In [13]:
train_study_level=pd.read_csv("summary/train_study_level.csv")
classes_dict = {
    0 : "Negative for Pneumonia",
    1  : "Typical Appearance",
    2  : "Indeterminate Appearance",
    3  : "Atypical Appearance"
}

# Making one-hot of study_level labels and removing other 4 class columns
train_study_level["one_hot"] = train_study_level.apply(lambda x : np.array([x["Negative for Pneumonia"],
                                                        x["Typical Appearance"],
                                                        x["Indeterminate Appearance"],
                                                        x["Atypical Appearance"]]),axis=1)

train_study_level["pneumonia"] = train_study_level["one_hot"].map(lambda x : classes_dict[np.argmax(x)])
train_study_level["pneumonia_class"] = train_study_level["one_hot"].map(lambda x : np.argmax(x))
train_study_level = train_study_level.drop(["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance","one_hot"],axis=1)
train_study_level["id"] = train_study_level["id"].map(lambda x : x.replace("_study",""))
train_study_level.rename(columns={"id" : "study_id"},inplace=True)
train_study_level.head()

Unnamed: 0,study_id,pneumonia,pneumonia_class
0,00086460a852,Typical Appearance,1
1,000c9c05fd14,Atypical Appearance,3
2,00292f8c37bd,Negative for Pneumonia,0
3,005057b3f880,Negative for Pneumonia,0
4,0051d9b12e72,Atypical Appearance,3


In [14]:
train_info = pd.merge(train_image_level,train_study_level,on = "study_id") # Merging study_df and image_df

train_info = pd.merge(train_info,meta_train,on = "image_id") # Merging to meta_train for height,width

# Filling NaN values 
train_info["boxes"].fillna("[{'x':0,'y':0,'width':1,'height':1}]",inplace=True)
temp = train_info # for going through the data
train_info["boxes"] = train_info["boxes"].map(lambda x : ast.literal_eval(x))


columns = ["image_id","study_id","pneumonia","pneumonia_class","height","width","boxes","image_label"] # for proper order
train_info = train_info[columns]


train_info.to_csv("/content/drive/MyDrive/siim/summary/train_info.csv",index=False)
train_info.head()

Unnamed: 0,image_id,study_id,pneumonia,pneumonia_class,height,width,boxes,image_label
0,000a312787f2,5776db0cec75,Typical Appearance,1,3488,4256,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity 1 789.28836 582.43035 1815.94498 2499....
1,000c3a3f293f,ff0879eb20ed,Negative for Pneumonia,0,2320,2832,"[{'x': 0, 'y': 0, 'width': 1, 'height': 1}]",none 1 0 0 1 1
2,0012ff7358bc,9d514ce429a7,Typical Appearance,1,2544,3056,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity 1 677.42216 197.97662 1545.21983 1197....
3,001398f4ff4f,28dddc8559b2,Atypical Appearance,3,3520,4280,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity 1 2729 2181.33331 3677.00012 2785.33331
4,001bd15d1891,dfd9fdd85a3e,Typical Appearance,1,2800,3408,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity 1 623.23328 1050 1337.23328 2156 opaci...


In [15]:
meta_train.to_csv("/content/drive/MyDrive/siim/summary/meta_train.csv",index=False)
meta_test.to_csv("/content/drive/MyDrive/siim/summary/meta_test.csv",index=False)
train_image_level.to_csv("/content/drive/MyDrive/siim/summary/train_image_level_temp.csv",index=False)
train_study_level.to_csv("/content/drive/MyDrive/siim/summary/train_study_level_temp.csv",index=False)