# Cell Profiler - Per Patient
Nucleus rollup per patch.

In [1]:
import numpy as np
import pandas as pd
import os
import csv
from datetime import datetime
print(datetime.now())

2022-08-09 15:48:15.544926


In [2]:
BASE_PATH='/home/jrm/Adjeroh/Glioma/August_Run/CellProfilerPerPatient/' # Alien
BASE_PATH='D:\\Adjeroh\\Glioma\\August_Run\\CellProfilerPerPatient\\'   # Windows
NUCLEUS_INPUT='Process100_Nucleus.csv'
NUCLEUS_OUTPUT='NucleusToPatchRollup.csv'
IMAGE_COL='ImageNumber'

In [3]:
def save_patient(patient,df):
    if patient is not None:
        filename = BASE_PATH + patient + '/' + NUCLEUS_OUTPUT
        #print('Save',filename)
        #df = pd.DataFrame(one_row_as_dict)
        #df = df.replace('nan',np.nan)
        df.to_csv(filename)  # Now ImageNumber is the index. Do not say index=False! 

In [4]:
def drop_columns(df):
    cols = df.columns
    # For patch-level rollup, need to leave column ImageNumber
    bad_cols=[c for c in cols if c in
              ['Children_Cells_Count','Number_Object_Number','ObjectNumber']]
    # These fields specify the nucleus location relative to the patch, which is irrelevant.
    loc_cols=[c for c in cols 
              if c.startswith('Location_')
              or c.startswith('AreaShape_BoundingBoxM')
              or c.startswith('AreaShape_Center')]
    df = df.drop(columns=bad_cols)
    df = df.drop(columns=loc_cols)
    return df

In [5]:
def rollup(df):
    rollup = df.groupby([IMAGE_COL]).describe(include=[np.number]) ## this is slow: 3min/patient
    rollup.columns=rollup.columns.map('_'.join)  ## helps random forest code
    return rollup

In [6]:
print(datetime.now())
for patient in os.listdir(BASE_PATH):
    if patient.startswith('TCGA-'):
        print(datetime.now())
        print('Processing',patient)
        filename = BASE_PATH + patient +'/' + NUCLEUS_INPUT
        bigdf = pd.read_csv(filename)
        shape1 = bigdf.shape
        bigdf = drop_columns(bigdf)
        shape2 = bigdf.shape
        bigdf = rollup(bigdf)
        shape3 = bigdf.shape
        print('Shape initial, reduced, described:',shape1,shape2,shape3)
        save_patient(patient,bigdf)

2022-08-09 15:48:15.637553
2022-08-09 15:48:15.655655
Processing TCGA-02-0004-01Z-00
Shape initial, reduced, described: (6979, 650) (6979, 632) (396, 5048)
2022-08-09 15:52:42.002998
Processing TCGA-02-0010-01Z-00
Shape initial, reduced, described: (18537, 650) (18537, 632) (1568, 5048)
2022-08-09 16:10:18.445483
Processing TCGA-02-0025-01Z-00
Shape initial, reduced, described: (18540, 650) (18540, 632) (790, 5048)
2022-08-09 16:19:09.154774
Processing TCGA-02-0033-01Z-00
Shape initial, reduced, described: (10982, 650) (10982, 632) (395, 5048)
2022-08-09 16:23:32.937742
Processing TCGA-02-0285-01Z-00
Shape initial, reduced, described: (14667, 650) (14667, 632) (404, 5048)
2022-08-09 16:28:03.453113
Processing TCGA-02-0338-01Z-00
Shape initial, reduced, described: (17891, 650) (17891, 632) (398, 5048)
2022-08-09 16:32:31.237738
Processing TCGA-02-0430-01Z-00
Shape initial, reduced, described: (11338, 650) (11338, 632) (395, 5048)
2022-08-09 16:36:56.041849
Processing TCGA-02-0439-01Z-00

2022-08-10 01:33:13.116857
Processing TCGA-DU-6408-01Z-00
Shape initial, reduced, described: (6213, 650) (6213, 632) (401, 5048)
2022-08-10 01:37:29.350121
Processing TCGA-DU-7009-01Z-00
Shape initial, reduced, described: (7310, 650) (7310, 632) (401, 5048)
2022-08-10 01:41:46.091150
Processing TCGA-DU-7014-01Z-00
Shape initial, reduced, described: (8273, 650) (8273, 632) (406, 5048)
2022-08-10 01:46:07.510594
Processing TCGA-DU-7015-01Z-00
Shape initial, reduced, described: (7525, 650) (7525, 632) (381, 5048)
2022-08-10 01:50:11.645101
Processing TCGA-DU-7299-01Z-00
Shape initial, reduced, described: (6398, 650) (6398, 632) (396, 5048)
2022-08-10 01:54:25.061826
Processing TCGA-DU-8164-01Z-00
Shape initial, reduced, described: (7464, 650) (7464, 632) (377, 5048)
2022-08-10 01:58:26.614468
Processing TCGA-DU-8165-01Z-00
Shape initial, reduced, described: (7463, 650) (7463, 632) (381, 5048)
2022-08-10 02:02:31.339710
Processing TCGA-DU-8167-01Z-00
Shape initial, reduced, described: (573

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\Adjeroh\\Glioma\\August_Run\\CellProfilerPerPatient\\TCGA-HW-7495-01Z/Process100_Nucleus.csv'

In [None]:
print(datetime.now())
