In [1]:
# define logging and working directory
from ProjectRoot import change_wd_to_project_root 
change_wd_to_project_root()
%matplotlib inline
%reload_ext autoreload
%autoreload 2
from src.utils.notebook_imports import *
from src.utils.utils_io import Console_and_file_logger, ensure_dir
from src.visualization.Visualize import plot_3d_vol, plot_4d_vol, plot_value_histogram, show_2D_or_3D
from src.data.Dataset import get_metadata_maybe, filter_4d_vol, copy_meta_and_save, create_3d_volumes_from_4d_files, describe_sitk, describe_volume, describe_path, get_phase, is_patient_in_df, get_extremas
Console_and_file_logger('Create 3D dataframe for voxelmorph')

search for root_dir and set working directory
Working directory set to: /mnt/data/git/cardio


Using TensorFlow backend.
2020-07-23 15:41:56,112 INFO -------------------- Start --------------------
2020-07-23 15:41:56,115 INFO Working directory: /mnt/data/git/cardio.
2020-07-23 15:41:56,115 INFO Log file: ./logs/Create 3D dataframe for voxelmorph.log
2020-07-23 15:41:56,116 INFO Log level for console: INFO


<src.utils.utils_io.Console_and_file_logger at 0x7f4044c75710>

# Create a 3D dataframe 
## Extract the t position from the filenames
## normalize labeled timesteps to values between 0 and 5
## Filter all patients with less than 5 labeled timesteps
 

In [5]:
img_path = sorted(glob.glob(os.path.join('data/raw/gcn_05_2020_ax_sax_86/AX_3D_ISO/', '*msk.nrrd')))
len(img_path)

162

In [6]:
def get_date_from_columns(row):
    
    d = '{:02}'.format(int(row['DD']))
    m = '{:02}'.format(int(row['MM']))
    y = '{:04}'.format(int(row['YYYY']))
    return '{}-{}-{}'.format(y, m, d)
    
    

In [7]:
def get_date_from_filename(f_name):
    
    import re
    return re.findall(r'\d\d\d\d-\d\d-\d\d',f_name)[0]

In [8]:
def get_volumes(f_name):
    """
    expects a full filename for a nrrd mask
    returns: a dict with the format: {label1 : size1, label2 : size2 ...}
    volume size in ml^3, calculated with # voxels * spacing_x * spacing_y * spacing_z
    """
    # load image, transform to nda
    img = sitk.ReadImage(f_name)
    nda = sitk.GetArrayFromImage(img)
    
    # describe image, get dict of key, values
    descr = describe_volume(f_name)
    
    # helper, calculate the volume in ml for one label
    def calc_vol(index):
        return (nda==index).sum() * descr.get('x-spacing',1) * descr.get('y-spacing',1) * descr.get('z-spacing',1)//1000
    
    # calc volume for each label in ml, return a dict with label-value: volume-size in ml
    volumes = dict([(int(i),calc_vol(i)) for i in np.unique(nda)])
    return volumes

In [16]:
# extract t from the filenames
def extract_t_from_filename(f_name):
    return int(os.path.basename(os.path.normpath(f_name)).split('__')[1].split('_')[0].replace('t',''))

In [13]:
def clean_volume_dataframe(df, timesteps=5):
    """
    Handle Nan fields, convert columns datatypes, calc t_norm, create img and mask file columns
    drop all patients with labeled timesteps != 5
    returns cleaned df
    """
    from collections import Counter
    
    # handle nan
    df = df.fillna(0)
    df['file'] = img_path
    df['patient'] = df.apply(lambda x : os.path.basename(x['file']).split('-')[1], axis=1)
    try: # 4D files
        df['t'] = df.apply(lambda x : os.path.basename(x['file']).split('_msk')[0].split('__')[1].replace('t', ''), axis=1)
    except:
        logging.info('try 3D pattern matching to etract t from the filenames')
        df['t'] = df.file.apply(extract_t_from_filename)
    # convert strings to int, for the 4 labels per patient and the timestep
    df[[0, 1, 2, 3, 't']] = df[[0, 1, 2, 3, 't']].astype(np.int)
    # convert patient id to string, make sure
    df['patient'] = df['patient'].astype(str)
    
    # rename label columns names
    cols = list(df.columns)
    cols[0:4]  = ['background', 'rv', 'myo', 'lv'] 
    df.columns = cols
    
    # find all patients with labeled timesteps != 5
    patients = len(df['patient'].unique())
    print('found {} patients'.format(patients))
    c = Counter(df.patient)
    p_remove = [key for key,value in c.items() if value != timesteps]
    print('patients with labeled timesteps != {}: \n{}'.format(timesteps,p_remove))
    df = df[~df['patient'].isin(p_remove)]
    patients = len(df['patient'].unique())
    print('found {} cleaned patients'.format(patients))
    
    # sort values by patient id and timesteps, 
    # create the normalized time columne, expecting every patient to have 5 timesteps (cleaned before)
    df.sort_values(['patient', 't'], inplace=True)
    patients = len(df['patient'].unique())
    print('found {} patients'.format(patients))
    temp = list(range(timesteps))
    df['t_norm'] = temp * patients
    
    # rename columns to work the same way as the 2D dataframe
    df['y_path'] = df['file']
    df['x_path'] = df['file'].str.replace('msk', 'img')
    df.columns = df.columns.str.replace('file', 'y_path')
    
    return df

    


In [14]:
@interact
def calc_vol_size_interact(f_name=img_path):
    """
    calculate the volume sizes for each label of a 3D volume
    """
    
    logging.info(f_name)
    img = sitk.ReadImage(f_name)
    nda = sitk.GetArrayFromImage(img)

    logging.info('Shape: {}'.format(img.GetSize()))
    logging.info('Spacing: {}'.format(img.GetSpacing()))
    logging.info('{}{}{}'.format('-'*10, ' Volumes sizes in voxels ', '-'*10))
    logging.info('Backround voxels: {}'.format((nda==0).sum()))
    logging.info('RV voxels: {}'.format((nda==1).sum()))
    logging.info('Myo voxels: {}'.format((nda==2).sum()))
    logging.info('LV voxels: {}'.format((nda==3).sum()))
    
    #bsa = Wurzel(Größe [cm] x Gewicht [kg] / 3600)
    vols = get_volumes(f_name)
    logging.info('{}{}{}'.format('-'*10, ' Volume sizes in ml ', '-'*10))
    logging.info('Background in ml: {}'.format(vols[0]))
    logging.info('RV in ml: {}'.format(vols[1]))
    logging.info('MYO in ml: {}'.format(vols[2]))
    logging.info('LV in ml: {}'.format(vols[3]))
    #describe_sitk(img)
    

interactive(children=(Dropdown(description='f_name', options=('data/raw/gcn_05_2020_ax_sax_86/AX_3D_ISO/0000-0…

# Create volume dataframe

In [11]:
pd.set_option('display.max_colwidth', -1)
df_volumes = pd.DataFrame([get_volumes(f) for f in img_path])

# Clean volume dataframe

In [17]:
df_volumes = clean_volume_dataframe(df_volumes)
# check for nan values
df_volumes[df_volumes.isnull().any(axis=1)]
# show top of df
df_volumes.head()

2020-07-06 10:50:20,047 INFO try 3D pattern matching to etract t from the filenames


IndexError: list index out of range

# Create a dicom tag dataframe

In [10]:
# this will search for subdirectories if no partterns match in the parent folder
# filepatterns are defined by the dataset parameter
df2 = describe_path('data/raw/GCN/3D/', dataset='GCN', plot_histogram=False)

# drop masks as own rows, they will be in the same row as the image
df2 = df2[df2['image'] == True]

# create the same patient columns as in the volume df
df2['patient_dicom'] = df2.PatientID
df2['x_path'] = df2['f_name']

# same files have different patient IDs in the dicom tag than in the filename, use the ids from the filesnames, 
# they should be correct according to manual research in metadata.xls and circle
df2['patient'] = df2.apply(lambda x : os.path.basename(x['x_path']).split('-')[1], axis=1)
df2[df2.apply(lambda x : x['patient_dicom'] != x['patient'], axis=1)][['patient_dicom', 'patient']]

2019-11-27 21:18:44,202 INFO Using GCN dataset
2019-11-27 21:18:44,203 INFO search in subfolders ...
2019-11-27 21:18:44,210 INFO describing path: data/raw/GCN/3D/


Unnamed: 0,patient_dicom,patient
135,04NEJQU7,04NEJQUZ
136,04NEJQU7,04NEJQUZ
137,04NEJQU7,04NEJQUZ
138,04NEJQU7,04NEJQUZ
139,04NEJQU7,04NEJQUZ
711,D4PXE75F,KW4MJ3XX
712,D4PXE75F,KW4MJ3XX
713,D4PXE75F,KW4MJ3XX
714,D4PXE75F,KW4MJ3XX
715,D4PXE75F,KW4MJ3XX


# Clean dicom tag dataframe

In [11]:
# find all patients in the second dataframe with labeled timesteps != 5
patients = len(df2['patient'].unique())
print('found {} patients'.format(patients))
from collections import Counter
c = Counter(df2.patient)
p_remove = [key for key,value in c.items() if value != 5]
print('patients with labeled timesteps != 5: \n{}'.format(p_remove))
df2 = df2[~df2['patient'].isin(p_remove)]
print('found {} cleaned patients'.format(patients))

df_volumes.shape
df2.shape

found 209 patients
patients with labeled timesteps != 5: 
['E2HMADJ3', 'F0QP6ZJR', 'GYMP57R6', 'L1ACV3UE', 'RNMQ8VH6', 'TX0L610P']
found 209 cleaned patients


(1015, 10)

(1015, 48)

In [12]:
# check if both dataframes have the same ids
set(df_volumes.patient.unique()) - set(df2.patient.unique())
set(df2.patient.unique()) - set(df_volumes.patient.unique())

set()

set()

In [13]:
# check if both dataframes have the same x_path / unique key
set(df_volumes['x_path'].unique()) - set(df2['x_path'].unique())
set(df2['x_path'].unique()) - set(df_volumes['x_path'].unique())

set()

set()

# Merge volume dataframe and dicom metadata dataframe

In [14]:
# merge by x_path, which is unique for the 3d files, patient id is not unique, because we have 5 files per patient
df = pd.merge(df_volumes, df2, on='x_path')
len(df_volumes.patient.unique())
df.shape
# check if all patient ids have matched
all(df.patient_x == df.patient_y)
# delete double columns
df['patient'] = df['patient_x']
df = df.drop('patient_y', axis =1)
df = df.drop('patient_x', axis =1)

203

(1015, 57)

True

In [15]:
# save to disk
df.to_csv('reports/vae/3d_gcn.csv')

# Read in the Excel metadata and merge into the existing dataframe

In [16]:
# read the joined metadata xls
df_meta = pd.read_excel('data/external/metadata_joined.xls')
df_meta['patient'] = df_meta['PID']

In [17]:
# check if all patient ids of our current df are in this excel sheet
set(df.patient) - set(df_meta.patient)

set()

In [18]:
# merge and save to disk
df = pd.merge(df, df_meta, on='patient', how='left')
df.shape
df.to_csv('reports/vae/3d_gcn.csv')

(1015, 145)

# Read in Excel with corrected outcome sheet and merge into existing one

In [24]:
# read in corrected xls sheet, dwefine the patient columns, delete PID column
df_outcome_corrected = pd.read_excel('data/external/Outcome_TOF_GCN_HL_15072018_for Tarique and Sven.xlsx')
df_outcome_corrected['patient'] = df_outcome_corrected['PID']
df_outcome_corrected = df_outcome_corrected.drop('PID', axis=1)
# minor cleaning is neccessary because the excel sheet had spaces in pid and outcome column
df_outcome_corrected['patient'] = df_outcome_corrected['patient'].apply(lambda x : x.replace(' ', ''))
df_outcome_corrected['Outcome y/n'] = df_outcome_corrected['Outcome y/n'].apply(lambda x : str(x).replace(' ', ''))

# check if all patient ids of our current df are in this excel sheet
len(set(df.patient) - set(df_outcome_corrected.patient))

0

In [33]:
Counter(df_outcome_corrected['Outcome y/n'])

Counter({'y': 25, 'n': 341, 'nan': 44})

In [22]:
# currrent outcome column values
df['Outcome y/n'].unique()
# drop all double columns, keep the columns from the corrected sheet
df = df.drop(['Outcome y/n', 'Date PVR', 'PVR nach MRT y/n', 'Date Outcome'], axis=1, errors='ignore')

array(['n', 'y', nan, '?'], dtype=object)

In [28]:
df = pd.merge(df, df_outcome_corrected, on='patient', how='left')
df.shape
df.columns

(1015, 147)

Index(['background', 'rv', 'myo', 'lv', 'y_path', 't', 't_norm', 'y_path',
       'x_path', '.50-quantle',
       ...
       'Year of the redo surgeries Jahr der Redos',
       'redo after V1 RE OP nach V1',
       'surgery date while enrolled in study OP Datum während Studie',
       'tricuspid insufficiency grades 0-4 tricinsuf', 'Last Report Date_y',
       'PVR nach MRT y/n', 'Date PVR', 'Outcome y/n', 'Type Outcome_y',
       'Date Outcome'],
      dtype='object', length=147)

In [36]:
Counter(df['Outcome y/n'])

Counter({'n': 855, 'y': 60, 'nan': 100})

In [38]:
# save dataframe of 3d files to disk
df.to_csv('reports/vae/3d_gcn.csv')

# Read in Excel with corrected phases, merge into existing dataframe

In [39]:
df_timesteps = pd.read_excel('data/external/GCN Dataset_timesteps.xlsx')

# clean
df_timesteps['Contoured?'] = df_timesteps['Contoured?'].str.replace("Y ", "y").replace('Y', 'y').replace('N ', 'N')
df_timesteps['patient'] = df_timesteps['ID']
df_timesteps = df_timesteps.drop('ID', axis=1, errors='ignore')

# we only need the rows with exported xml files
df_timesteps = df_timesteps[df_timesteps['XML exported']== 'Y' ]

In [40]:
# create a unique identifier to map them
df_timesteps['patient_unique'] = df_timesteps['patient'] + '-' + df_timesteps.apply(get_date_from_columns, axis=1)
df['patient_unique'] = df['patient'] + '-' + df['x_path'].apply(get_date_from_filename)

In [41]:
# check if all patient ids of our current df are in this excel sheet
set(df.patient) - set(df_timesteps.patient)

set()

In [42]:
df_timesteps.head()

Unnamed: 0,Study #,YYYY,MM,DD,Loaded to circle?,Short Axis Stack Present,SA Stack Usable? Y or why not,Axial stack present?,Contoured?,Contoured By?,...,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,patient,patient_unique
2,3,2007.0,3.0,13.0,Y,Y,Y,Y,y,AP,...,,,,,,,,,04NEJQUZ,04NEJQUZ-2007-03-13
5,6,1900.0,1.0,1.0,Y,Y,Y,N,y,AP,...,,,,,,,,,0AE4R74L,0AE4R74L-1900-01-01
6,7,2007.0,5.0,23.0,Y,Y,Y,Y,y,AP,...,,,,,,,,,0HQQW4ZN,0HQQW4ZN-2007-05-23
7,8,2005.0,6.0,27.0,Y,Y,Y,N,y,AP,...,,,,,,,,,0PTV75MP,0PTV75MP-2005-06-27
9,10,2007.0,2.0,13.0,Y,Y,Y,Y,y,SP,...,,,,,,,,,0RPELLU8,0RPELLU8-2007-02-13


In [45]:
# get the phases for all matched patients
temp = df.apply(get_phase, args=(df_timesteps, 'patient_unique'), axis=1)
c = Counter(temp)
c

Counter({'MS': 203, 'ES': 203, 'MD': 203, 'PF': 203, 'ED': 203})

In [46]:
# add phases to current dataframe
df['phase'] = temp
# transform to categorical values with ordering
df['phase'] = pd.Categorical(df.phase, 
                      categories=['ED','MS','ES','PF','MD'],
                      ordered=True)
# check if there are timesteps we cant match to any phase
df[df['phase'] == 'no_phase_fits'][['patient_unique', 't']]
df.shape

Unnamed: 0,patient_unique,t


(1015, 149)

In [47]:
df.to_csv('reports/vae/3d_gcn.csv')

# Create a motiongenerator from dataframe --> train voxelmorph

In [127]:
from src.data.generators import MotionDataGenerator
config = dict()
config['BATCHSIZE'] = 2
config['ARCHITECTURE'] = '3D' # 2D
config['DIM'] = [16, 224, 224] # [16,244,244]
config['SPACING'] = [7, 1.0,1.0] # used by sitk, opposite order than numpy or tensorflow!

# create a list of z slices with t_n and t_n+1 , not possible for the last timestep
t_1 = np.concatenate([df[df['t_norm'] == 0]['x_path'].values, df[df['t_norm'] == 1]['x_path'].values, df[df['t_norm'] == 2]['x_path'].values, df[df['t_norm'] == 3]['x_path'].values])
t_2 = np.concatenate([df[df['t_norm'] == 1]['x_path'].values, df[df['t_norm'] == 2]['x_path'].values, df[df['t_norm'] == 3]['x_path'].values, df[df['t_norm'] == 4]['x_path'].values])
batch_generator = MotionDataGenerator(t_1, t_2, config)

2019-11-27 16:36:40,672 INFO Create DataGenerator
2019-11-27 16:36:40,674 INFO Datagenerator created with: 
 shape: [16, 224, 224]
 batchsize: 2
 Scaler: MinMax
 Images: 812 
 Augment_grid: False 
 Thread workers: 2
2019-11-27 16:36:40,675 INFO No augmentation


In [128]:
len(t_1)
len(t_2)
len(batch_generator)

812

812

406

In [129]:
logging.getLogger().setLevel(logging.INFO)
from src.visualization.visualize import show_2D_or_3D

In [130]:
# Select batch generator output
x = ''
y = ''
@interact
def select_batch(batch = (0,len(batch_generator), 1)):
    global x, y
    input_ , output_ = batch_generator.__getitem__(batch)
    x = input_[0]
    y = output_[0]
    logging.info(x.shape)
    logging.info(y.shape)

interactive(children=(IntSlider(value=203, description='batch', max=406), Output()), _dom_classes=('widget-int…

In [131]:
@interact
def select_image_in_batch(im = (0,x.shape[0]- 1, 1)):
    
    # define a different logging level to make the generator steps visible
    logging.getLogger().setLevel(logging.INFO)
    show_2D_or_3D(x[im])
    plt.show()
    show_2D_or_3D(y[im])
    plt.show()

interactive(children=(IntSlider(value=0, description='im', max=1), Output()), _dom_classes=('widget-interact',…

# Create a dataframe from all ACDC 3D volumes

In [2]:
# raw images
images = sorted(glob.glob(os.path.join('data/raw/ACDC/original/all/**/','*[0-9][0-9].nii.gz'), recursive=True))
masks = sorted(glob.glob(os.path.join('data/raw/ACDC/original/all/**/','*_gt.nii.gz'), recursive=True))
len(images)
len(masks)

200

In [21]:
os.path.basename(images[0]).split('_')[0]

'patient001'

In [4]:
# images & image stats
from src.data.Dataset import get_acdc_dataset_as_df, create_acdc_dataframe_for_cv
df1 = describe_path('data/raw/ACDC/original/all', dataset='ACDC', plot_histogram=False)
df1 = df1[df1['image'] == True]
df1['patient'] = df1['f_name'].apply(lambda x : os.path.basename(x).split('.')[0].split('_')[0])
df1['x_path'] = df1['f_name']
df1.head()
df1.info()

2020-07-23 15:42:33,601 INFO Using acdc dataset
2020-07-23 15:42:33,609 INFO describing path: data/raw/ACDC/original/all


<class 'pandas.core.frame.DataFrame'>
Int64Index: 200 entries, 0 to 199
Data columns (total 47 columns):
.50-quantle                200 non-null float64
.75-quantile               200 non-null float64
.99-quantile               200 non-null float64
CardiacNumberOfImages      200 non-null object
InstitutionAddress         200 non-null object
InstitutionName            200 non-null object
LargestImagePixelValue     200 non-null int64
MagneticFieldStrength      200 non-null object
Manufacturer               200 non-null object
ManufacturerModelName      200 non-null object
PatientAge                 200 non-null object
PatientBirthDate           200 non-null object
PatientID                  200 non-null object
PatientPosition            200 non-null object
PatientSex                 200 non-null object
PatientSize                200 non-null object
PatientWeight              200 non-null object
ReferringPhysicianName     200 non-null object
SeriesDescription          200 non-null object


In [16]:
Console_and_file_logger('prediction3D', logging.DEBUG)

2020-07-23 16:10:21,542 INFO -------------------- Start --------------------
2020-07-23 16:10:21,542 INFO Working directory: /mnt/data/git/cardio.
2020-07-23 16:10:21,542 INFO Log file: ./logs/prediction3D.log
2020-07-23 16:10:21,543 INFO Log level for console: DEBUG


<src.utils.utils_io.Console_and_file_logger at 0x7f3fb3276390>

In [18]:
# pathology data
df2 = create_acdc_dataframe_for_cv(path_to_data='data/raw/ACDC/2D/all/', img_pattern='*frame[0-9][0-9].nii.gz')

2020-07-23 16:13:21,612 INFO Found: 0 files in data/raw/ACDC/2D/all/
2020-07-23 16:13:22,074 INFO Created a dataframe with shape: (600, 5)
2020-07-23 16:13:22,077 DEBUG 20 Patients found for pathology: DCM
2020-07-23 16:13:22,077 DEBUG Fold: 0, Pathology: DCM train: ['patient003', 'patient004', 'patient005', 'patient006', 'patient007', 'patient008', 'patient010', 'patient011', 'patient012', 'patient013', 'patient014', 'patient015', 'patient017', 'patient019', 'patient020']
2020-07-23 16:13:22,077 DEBUG Fold: 0, Pathology: DCM, test: ['patient001', 'patient002', 'patient009', 'patient016', 'patient018']
2020-07-23 16:13:22,087 DEBUG Files x_train: 0
2020-07-23 16:13:22,088 DEBUG Files x_test: 0
2020-07-23 16:13:22,089 DEBUG Fold: 1, Pathology: DCM train: ['patient001', 'patient002', 'patient003', 'patient005', 'patient007', 'patient008', 'patient009', 'patient010', 'patient011', 'patient013', 'patient014', 'patient015', 'patient016', 'patient018', 'patient020']
2020-07-23 16:13:22,090 D

In [9]:
temp = get_acdc_dataset_as_df('data/raw/ACDC/original/all/')

In [15]:
df2.head()

Unnamed: 0,fold,modality,pathology,patient,x_path,y_path


In [5]:
df2 = df2[df2['fold'] == 0]
df2['y_path'] = df2['y_path'].apply(lambda x : x.replace('.nii.gz', '_gt.nii.gz'))
df2.head()
df2.info()

2020-07-23 15:43:13,611 INFO Found: 200 files in data/raw/ACDC/original/all/


<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Data columns (total 6 columns):
fold         0 non-null object
modality     0 non-null object
pathology    0 non-null object
patient      0 non-null object
x_path       0 non-null object
y_path       0 non-null object
dtypes: object(6)
memory usage: 0.0+ bytes


In [6]:
df1.shape
df2.shape

(0, 6)

In [52]:
# merge both datasets by patient id
df = pd.merge(df1, df2, on='x_path')

In [53]:
df.describe()
df.shape

Unnamed: 0,.50-quantle,.75-quantile,.99-quantile,LargestImagePixelValue,SmallestImagePixelValue,dimension,max,mean,min,slices,t-axis,t-spacing,x-axis,x-spacing,y-axis,y-spacing,z-axis,z-spacing
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,47.43,94.85,269.29325,0.0,100.0,3.0,601.265,67.78895,8.5,9.51,0.0,0.0,220.12,1.51171,247.14,1.51171,9.51,9.335
std,23.135717,56.16413,196.074704,0.0,0.0,0.0,746.262436,35.019502,9.337879,2.395536,0.0,0.0,33.958674,0.185097,39.343614,0.185097,2.395536,1.668591
min,19.0,52.0,135.0,0.0,100.0,3.0,184.0,42.639797,0.0,6.0,0.0,0.0,154.0,0.703125,154.0,0.703125,6.0,5.0
25%,36.0,68.0,162.75,0.0,100.0,3.0,255.0,51.92994,0.0,8.0,0.0,0.0,208.0,1.36719,224.0,1.36719,8.0,10.0
50%,43.0,75.5,216.0,0.0,100.0,3.0,255.0,56.38789,5.5,9.0,0.0,0.0,216.0,1.5625,256.0,1.5625,9.0,10.0
75%,50.0,88.0,296.0,0.0,100.0,3.0,559.0,65.481128,15.0,10.0,0.0,0.0,234.0,1.5625,256.0,1.5625,10.0,10.0
max,180.0,362.0,1176.0,0.0,100.0,3.0,4031.0,242.061382,32.0,18.0,0.0,0.0,428.0,1.91964,512.0,1.91964,18.0,10.0


(200, 52)

In [54]:
df.to_csv('reports/vae/3d_acdc.csv')