# Filter
Notebook Image.05 filtered high-RBC patches, but also removed the patch filenames.
Here, for reporting purposes, we list the patch filenames that were filtered.

In [1]:
from datetime import datetime
print(datetime.now())
from platform import python_version
print('Python',python_version())
import csv
import numpy as np
import pandas as pd
import matplotlib as plt

2022-07-23 10:44:57.552305
Python 3.8.10


In [2]:
BASE_PATH_IN='/home/jrm/Adjeroh/Naved/CP_80K/'
FILENAMES=[
'Process100_Image.csv',
'Process100_Cells.csv',
'Process100_ExpandCells.csv',
'Process100_Experiment.csv',
'Process100_MergeRBC.csv',
'Process100_Nucleus.csv',
'Process100_RBC.csv',
'Process100_ShrinkRBC.csv',
'Process100_Tissue.csv']
INPUT_DIRS=[
'Output0/',
'Output1/',
'Output2/',
'Output3/',
'Output4/',
'Output5/'
]
FILE_COL='FileName_Tumor'
NEW_COL='WSI'
NUM_CLASSES = len(INPUT_DIRS)

In [3]:
def get_bad_rows(df):
    df = df[df.AreaOccupied_AreaOccupied_MergeRBC>=5000]    
    return df
def get_good_rows(df):
    df = df[df.AreaOccupied_AreaOccupied_MergeRBC<5000]    
    return df

In [4]:
LEN_CASE_ID=19
LEN_WSI_ID=23
def add_column(df):
    # Patch filename format: TCGA-06-0129-01Z-00-DX1_5400_5100.png
    # For WSI ID, use first 23 letters.
    # For patient or case ID, use first 19 letters. 
    new_column = df[FILE_COL].astype(str).str[:LEN_WSI_ID]
    df[NEW_COL] = new_column
    return df

In [5]:
infile = FILENAMES[0]  # only process Image.csv
print(datetime.now())
num_filtered = np.zeros(NUM_CLASSES)
num_originals = np.zeros(NUM_CLASSES)
lost_wsi = 0
for cls in range(NUM_CLASSES):
    directory = INPUT_DIRS[cls]
    input =  BASE_PATH_IN+ directory+infile
    print()
    print('Cancer class',cls)
    print('Reading from',input)
    df = pd.read_csv(input)
    df = add_column(df)
    num_rows = len(df)
    num_originals[cls] = num_rows
    print('Number of original patches=',num_rows)
    num_wsi_1 = df[NEW_COL].nunique()
    print('Number of original WSI=',num_wsi_1)

    good = get_good_rows(df)
    num_wsi_2 = good[NEW_COL].nunique()
    print('Number of remaining WSI=',num_wsi_2)
    lost_wsi += (num_wsi_2 - num_wsi_1)
    good = None
    
    bad = get_bad_rows(df)
    num_bad_rows = len(bad)
    num_filtered[cls] = num_bad_rows
    num_originals[cls] = num_rows
    print('Number of filtered patches=',num_bad_rows)
    print()
    for i in range(num_bad_rows):
        print(df.iloc[i][FILE_COL])
    bad = None


2022-07-23 10:44:57.820580

Cancer class 0
Reading from /home/jrm/Adjeroh/Naved/CP_80K/Output0/Process100_Image.csv
Number of original patches= 44966
Number of original WSI= 117
Number of remaining WSI= 117
Number of filtered patches= 3677

TCGA-02-0004-01Z-00-DX1_10200_24300.png
TCGA-02-0004-01Z-00-DX1_10200_28500.png
TCGA-02-0004-01Z-00-DX1_10200_3900.png
TCGA-02-0004-01Z-00-DX1_10200_4500.png
TCGA-02-0004-01Z-00-DX1_10200_6000.png
TCGA-02-0004-01Z-00-DX1_10500_27600.png
TCGA-02-0004-01Z-00-DX1_10500_3900.png
TCGA-02-0004-01Z-00-DX1_10500_5700.png
TCGA-02-0004-01Z-00-DX1_10500_8700.png
TCGA-02-0004-01Z-00-DX1_10800_10500.png
TCGA-02-0004-01Z-00-DX1_10800_21600.png
TCGA-02-0004-01Z-00-DX1_10800_27900.png
TCGA-02-0004-01Z-00-DX1_11100_12900.png
TCGA-02-0004-01Z-00-DX1_11100_14100.png
TCGA-02-0004-01Z-00-DX1_11100_21300.png
TCGA-02-0004-01Z-00-DX1_11100_25800.png
TCGA-02-0004-01Z-00-DX1_11100_5700.png
TCGA-02-0004-01Z-00-DX1_11400_14100.png
TCGA-02-0004-01Z-00-DX1_11400_21900.png
TCGA-0

TCGA-02-0010-01Z-00-DX1_21300_22200.png
TCGA-02-0010-01Z-00-DX1_21300_23400.png
TCGA-02-0010-01Z-00-DX1_21300_25500.png
TCGA-02-0010-01Z-00-DX1_21600_12600.png
TCGA-02-0010-01Z-00-DX1_21600_1500.png
TCGA-02-0010-01Z-00-DX1_21600_18300.png
TCGA-02-0010-01Z-00-DX1_21600_24900.png
TCGA-02-0010-01Z-00-DX1_21600_6600.png
TCGA-02-0010-01Z-00-DX1_21900_20700.png
TCGA-02-0010-01Z-00-DX1_21900_22500.png
TCGA-02-0010-01Z-00-DX1_21900_4500.png
TCGA-02-0010-01Z-00-DX1_22200_11700.png
TCGA-02-0010-01Z-00-DX1_22200_19200.png
TCGA-02-0010-01Z-00-DX1_22200_22500.png
TCGA-02-0010-01Z-00-DX1_22200_5700.png
TCGA-02-0010-01Z-00-DX1_22500_19500.png
TCGA-02-0010-01Z-00-DX1_22500_22200.png
TCGA-02-0010-01Z-00-DX1_22500_25800.png
TCGA-02-0010-01Z-00-DX1_22500_3000.png
TCGA-02-0010-01Z-00-DX1_22800_12000.png
TCGA-02-0010-01Z-00-DX1_22800_15600.png
TCGA-02-0010-01Z-00-DX1_22800_21000.png
TCGA-02-0010-01Z-00-DX1_22800_22800.png
TCGA-02-0010-01Z-00-DX1_22800_23700.png
TCGA-02-0010-01Z-00-DX1_22800_2400.png
TCGA-0

TCGA-02-0010-01Z-00-DX2_25800_18900.png
TCGA-02-0010-01Z-00-DX2_25800_24300.png
TCGA-02-0010-01Z-00-DX2_25800_36600.png
TCGA-02-0010-01Z-00-DX2_25800_3900.png
TCGA-02-0010-01Z-00-DX2_25800_39900.png
TCGA-02-0010-01Z-00-DX2_26100_20100.png
TCGA-02-0010-01Z-00-DX2_26100_2700.png
TCGA-02-0010-01Z-00-DX2_26100_28200.png
TCGA-02-0010-01Z-00-DX2_26100_29700.png
TCGA-02-0010-01Z-00-DX2_26100_31500.png
TCGA-02-0010-01Z-00-DX2_26100_33300.png
TCGA-02-0010-01Z-00-DX2_26100_33900.png
TCGA-02-0010-01Z-00-DX2_26400_14400.png
TCGA-02-0010-01Z-00-DX2_26400_24900.png
TCGA-02-0010-01Z-00-DX2_26400_27000.png
TCGA-02-0010-01Z-00-DX2_26400_36600.png
TCGA-02-0010-01Z-00-DX2_26400_5100.png
TCGA-02-0010-01Z-00-DX2_26700_10200.png
TCGA-02-0010-01Z-00-DX2_26700_17100.png
TCGA-02-0010-01Z-00-DX2_26700_36600.png
TCGA-02-0010-01Z-00-DX2_26700_5700.png
TCGA-02-0010-01Z-00-DX2_27000_14700.png
TCGA-02-0010-01Z-00-DX2_27000_26400.png
TCGA-02-0010-01Z-00-DX2_27000_36000.png
TCGA-02-0010-01Z-00-DX2_27300_10500.png
TCGA

TCGA-02-0010-01Z-00-DX3_10500_36300.png
TCGA-02-0010-01Z-00-DX3_10500_36600.png
TCGA-02-0010-01Z-00-DX3_10500_9300.png
TCGA-02-0010-01Z-00-DX3_10800_10200.png
TCGA-02-0010-01Z-00-DX3_10800_12000.png
TCGA-02-0010-01Z-00-DX3_10800_27000.png
TCGA-02-0010-01Z-00-DX3_10800_35400.png
TCGA-02-0010-01Z-00-DX3_10800_5400.png
TCGA-02-0010-01Z-00-DX3_11100_10200.png
TCGA-02-0010-01Z-00-DX3_11100_27900.png
TCGA-02-0010-01Z-00-DX3_11100_5100.png
TCGA-02-0010-01Z-00-DX3_11400_13500.png
TCGA-02-0010-01Z-00-DX3_11400_21600.png
TCGA-02-0010-01Z-00-DX3_11400_33900.png
TCGA-02-0010-01Z-00-DX3_11400_36900.png
TCGA-02-0010-01Z-00-DX3_11700_6300.png
TCGA-02-0010-01Z-00-DX3_12000_11700.png
TCGA-02-0010-01Z-00-DX3_12000_18600.png
TCGA-02-0010-01Z-00-DX3_12000_22500.png
TCGA-02-0010-01Z-00-DX3_12000_37200.png
TCGA-02-0010-01Z-00-DX3_1200_14700.png
TCGA-02-0010-01Z-00-DX3_1200_33600.png
TCGA-02-0010-01Z-00-DX3_12300_10500.png
TCGA-02-0010-01Z-00-DX3_12300_22800.png
TCGA-02-0010-01Z-00-DX3_12300_7500.png
TCGA-02

TCGA-02-0010-01Z-00-DX3_38100_10800.png
TCGA-02-0010-01Z-00-DX3_38100_14100.png
TCGA-02-0010-01Z-00-DX3_38100_36300.png
TCGA-02-0010-01Z-00-DX3_38400_25800.png
TCGA-02-0010-01Z-00-DX3_38700_10500.png
TCGA-02-0010-01Z-00-DX3_3900_24300.png
TCGA-02-0010-01Z-00-DX3_3900_27900.png
TCGA-02-0010-01Z-00-DX3_3900_7500.png
TCGA-02-0010-01Z-00-DX3_4200_15900.png
TCGA-02-0010-01Z-00-DX3_4500_13800.png
TCGA-02-0010-01Z-00-DX3_4500_35400.png
TCGA-02-0010-01Z-00-DX3_4500_3900.png
TCGA-02-0010-01Z-00-DX3_4800_18900.png
TCGA-02-0010-01Z-00-DX3_4800_30300.png
TCGA-02-0010-01Z-00-DX3_4800_34200.png
TCGA-02-0010-01Z-00-DX3_4800_34500.png
TCGA-02-0010-01Z-00-DX3_5100_33600.png
TCGA-02-0010-01Z-00-DX3_5100_33900.png
TCGA-02-0010-01Z-00-DX3_5100_6300.png
TCGA-02-0010-01Z-00-DX3_5400_26700.png
TCGA-02-0010-01Z-00-DX3_5400_34500.png
TCGA-02-0010-01Z-00-DX3_5400_7500.png
TCGA-02-0010-01Z-00-DX3_5700_16800.png
TCGA-02-0010-01Z-00-DX3_5700_33900.png
TCGA-02-0010-01Z-00-DX3_5700_34200.png
TCGA-02-0010-01Z-00-DX3_

TCGA-02-0010-01Z-00-DX4_26100_21600.png
TCGA-02-0010-01Z-00-DX4_26100_25500.png
TCGA-02-0010-01Z-00-DX4_26100_26700.png
TCGA-02-0010-01Z-00-DX4_26400_12000.png
TCGA-02-0010-01Z-00-DX4_26400_14100.png
TCGA-02-0010-01Z-00-DX4_26400_15000.png
TCGA-02-0010-01Z-00-DX4_26400_17400.png
TCGA-02-0010-01Z-00-DX4_26400_23100.png
TCGA-02-0010-01Z-00-DX4_26400_5700.png
TCGA-02-0010-01Z-00-DX4_26700_24900.png
TCGA-02-0010-01Z-00-DX4_26700_5400.png
TCGA-02-0010-01Z-00-DX4_27000_15900.png
TCGA-02-0010-01Z-00-DX4_27000_21300.png
TCGA-02-0010-01Z-00-DX4_27000_21900.png
TCGA-02-0010-01Z-00-DX4_27000_5700.png
TCGA-02-0010-01Z-00-DX4_2700_4800.png
TCGA-02-0010-01Z-00-DX4_2700_7200.png
TCGA-02-0010-01Z-00-DX4_27300_17100.png
TCGA-02-0010-01Z-00-DX4_27300_5400.png
TCGA-02-0010-01Z-00-DX4_27300_5700.png
TCGA-02-0010-01Z-00-DX4_27600_21300.png
TCGA-02-0010-01Z-00-DX4_27600_23400.png
TCGA-02-0010-01Z-00-DX4_27600_6300.png
TCGA-02-0010-01Z-00-DX4_27900_14700.png
TCGA-02-0010-01Z-00-DX4_27900_16800.png
TCGA-02-00

TCGA-02-0025-01Z-00-DX1_22500_29700.png
TCGA-02-0025-01Z-00-DX1_22500_32700.png
TCGA-02-0025-01Z-00-DX1_22500_35100.png
TCGA-02-0025-01Z-00-DX1_22500_36900.png
TCGA-02-0025-01Z-00-DX1_22500_6300.png
TCGA-02-0025-01Z-00-DX1_22500_8100.png
TCGA-02-0025-01Z-00-DX1_22800_12900.png
TCGA-02-0025-01Z-00-DX1_22800_15300.png
TCGA-02-0025-01Z-00-DX1_22800_17400.png
TCGA-02-0025-01Z-00-DX1_22800_18600.png
TCGA-02-0025-01Z-00-DX1_22800_34800.png
TCGA-02-0025-01Z-00-DX1_22800_3600.png
TCGA-02-0025-01Z-00-DX1_22800_4200.png
TCGA-02-0025-01Z-00-DX1_22800_6300.png
TCGA-02-0025-01Z-00-DX1_22800_6600.png
TCGA-02-0025-01Z-00-DX1_22800_6900.png
TCGA-02-0025-01Z-00-DX1_22800_8100.png
TCGA-02-0025-01Z-00-DX1_22800_9000.png
TCGA-02-0025-01Z-00-DX1_22800_9900.png
TCGA-02-0025-01Z-00-DX1_23100_32400.png
TCGA-02-0025-01Z-00-DX1_23100_35400.png
TCGA-02-0025-01Z-00-DX1_23100_8700.png
TCGA-02-0025-01Z-00-DX1_23400_15900.png
TCGA-02-0025-01Z-00-DX1_23400_18600.png
TCGA-02-0025-01Z-00-DX1_23400_27000.png
TCGA-02-002

TCGA-02-0025-01Z-00-DX1_8100_17700.png
TCGA-02-0025-01Z-00-DX1_8100_25200.png
TCGA-02-0025-01Z-00-DX1_8100_25500.png
TCGA-02-0025-01Z-00-DX1_8100_25800.png
TCGA-02-0025-01Z-00-DX1_8400_17700.png
TCGA-02-0025-01Z-00-DX1_8400_21000.png
TCGA-02-0025-01Z-00-DX1_8400_21600.png
TCGA-02-0025-01Z-00-DX1_8400_25500.png
TCGA-02-0025-01Z-00-DX1_8400_26100.png
TCGA-02-0025-01Z-00-DX1_8400_26400.png
TCGA-02-0025-01Z-00-DX1_8400_27000.png
TCGA-02-0025-01Z-00-DX1_8400_7200.png
TCGA-02-0025-01Z-00-DX1_8700_11100.png
TCGA-02-0025-01Z-00-DX1_8700_20400.png
TCGA-02-0025-01Z-00-DX1_8700_21300.png
TCGA-02-0025-01Z-00-DX1_8700_25800.png
TCGA-02-0025-01Z-00-DX1_8700_26700.png
TCGA-02-0025-01Z-00-DX1_8700_27000.png
TCGA-02-0025-01Z-00-DX1_8700_5700.png
TCGA-02-0025-01Z-00-DX1_8700_6300.png
TCGA-02-0025-01Z-00-DX1_8700_6600.png
TCGA-02-0025-01Z-00-DX1_8700_6900.png
TCGA-02-0025-01Z-00-DX1_8700_9900.png
TCGA-02-0025-01Z-00-DX1_9000_24900.png
TCGA-02-0025-01Z-00-DX1_9000_25200.png
TCGA-02-0025-01Z-00-DX1_9000_26

TCGA-02-0025-01Z-00-DX2_26400_35700.png
TCGA-02-0025-01Z-00-DX2_26700_29700.png
TCGA-02-0025-01Z-00-DX2_26700_30900.png
TCGA-02-0025-01Z-00-DX2_26700_34800.png
TCGA-02-0025-01Z-00-DX2_27000_28800.png
TCGA-02-0025-01Z-00-DX2_2700_21900.png
TCGA-02-0025-01Z-00-DX2_2700_23400.png
TCGA-02-0025-01Z-00-DX2_27300_38100.png
TCGA-02-0025-01Z-00-DX2_27600_14400.png
TCGA-02-0025-01Z-00-DX2_27600_27000.png
TCGA-02-0025-01Z-00-DX2_27900_15600.png
TCGA-02-0025-01Z-00-DX2_28200_15300.png
TCGA-02-0025-01Z-00-DX2_28200_26700.png
TCGA-02-0025-01Z-00-DX2_28200_27000.png
TCGA-02-0025-01Z-00-DX2_28200_28500.png
TCGA-02-0025-01Z-00-DX2_28200_30000.png
TCGA-02-0025-01Z-00-DX2_28200_31800.png
TCGA-02-0025-01Z-00-DX2_28200_34200.png
TCGA-02-0025-01Z-00-DX2_28500_12900.png
TCGA-02-0025-01Z-00-DX2_28500_33000.png
TCGA-02-0025-01Z-00-DX2_28800_29700.png
TCGA-02-0025-01Z-00-DX2_28800_30000.png
TCGA-02-0025-01Z-00-DX2_29100_11700.png
TCGA-02-0025-01Z-00-DX2_29100_12900.png
TCGA-02-0025-01Z-00-DX2_29100_14700.png
TC

TCGA-02-0033-01Z-00-DX1_16500_21000.png
TCGA-02-0033-01Z-00-DX1_16500_36000.png
TCGA-02-0033-01Z-00-DX1_16500_36300.png
TCGA-02-0033-01Z-00-DX1_16500_36600.png
TCGA-02-0033-01Z-00-DX1_16500_36900.png
TCGA-02-0033-01Z-00-DX1_16500_6900.png
TCGA-02-0033-01Z-00-DX1_16800_20700.png
TCGA-02-0033-01Z-00-DX1_16800_31200.png
TCGA-02-0033-01Z-00-DX1_16800_36000.png
TCGA-02-0033-01Z-00-DX1_16800_36300.png
TCGA-02-0033-01Z-00-DX1_16800_36600.png
TCGA-02-0033-01Z-00-DX1_17100_20100.png
TCGA-02-0033-01Z-00-DX1_17100_36600.png
TCGA-02-0033-01Z-00-DX1_17400_17400.png
TCGA-02-0033-01Z-00-DX1_17400_36600.png
TCGA-02-0033-01Z-00-DX1_17400_36900.png
TCGA-02-0033-01Z-00-DX1_17700_15900.png
TCGA-02-0033-01Z-00-DX1_17700_36600.png
TCGA-02-0033-01Z-00-DX1_17700_36900.png
TCGA-02-0033-01Z-00-DX1_18000_16500.png
TCGA-02-0033-01Z-00-DX1_18000_17100.png
TCGA-02-0033-01Z-00-DX1_18000_18000.png
TCGA-02-0033-01Z-00-DX1_18000_33000.png
TCGA-02-0033-01Z-00-DX1_18000_36000.png
TCGA-02-0033-01Z-00-DX1_18000_36300.png
T

TCGA-02-0033-01Z-00-DX1_5400_29100.png
TCGA-02-0033-01Z-00-DX1_5400_32400.png
TCGA-02-0033-01Z-00-DX1_5400_9000.png
TCGA-02-0033-01Z-00-DX1_5700_34800.png
TCGA-02-0033-01Z-00-DX1_5700_36000.png
TCGA-02-0033-01Z-00-DX1_5700_9300.png
TCGA-02-0033-01Z-00-DX1_5700_9600.png
TCGA-02-0033-01Z-00-DX1_6000_10800.png
TCGA-02-0033-01Z-00-DX1_6000_17100.png
TCGA-02-0033-01Z-00-DX1_6000_25200.png
TCGA-02-0033-01Z-00-DX1_6300_27300.png
TCGA-02-0033-01Z-00-DX1_6300_31200.png
TCGA-02-0033-01Z-00-DX1_6300_37500.png
TCGA-02-0033-01Z-00-DX1_6300_39300.png
TCGA-02-0033-01Z-00-DX1_6300_39900.png
TCGA-02-0033-01Z-00-DX1_6300_40500.png
TCGA-02-0033-01Z-00-DX1_6600_11400.png
TCGA-02-0033-01Z-00-DX1_6600_27000.png
TCGA-02-0033-01Z-00-DX1_6600_36900.png
TCGA-02-0033-01Z-00-DX1_6600_38700.png
TCGA-02-0033-01Z-00-DX1_6600_39600.png
TCGA-02-0033-01Z-00-DX1_6600_40800.png
TCGA-02-0033-01Z-00-DX1_6900_18900.png
TCGA-02-0033-01Z-00-DX1_6900_21600.png
TCGA-02-0033-01Z-00-DX1_6900_33900.png
TCGA-02-0033-01Z-00-DX1_6900

TCGA-02-0285-01Z-00-DX1_21000_3600.png
TCGA-02-0285-01Z-00-DX1_21000_8700.png
TCGA-02-0285-01Z-00-DX1_21000_9600.png
TCGA-02-0285-01Z-00-DX1_2100_10500.png
TCGA-02-0285-01Z-00-DX1_2100_18600.png
TCGA-02-0285-01Z-00-DX1_2100_21900.png
TCGA-02-0285-01Z-00-DX1_2100_24300.png
TCGA-02-0285-01Z-00-DX1_2100_28800.png
TCGA-02-0285-01Z-00-DX1_21300_12000.png
TCGA-02-0285-01Z-00-DX1_21300_6000.png
TCGA-02-0285-01Z-00-DX1_21600_11100.png
TCGA-02-0285-01Z-00-DX1_21600_14400.png
TCGA-02-0285-01Z-00-DX1_21600_17700.png
TCGA-02-0285-01Z-00-DX1_21600_18900.png
TCGA-02-0285-01Z-00-DX1_21600_25800.png
TCGA-02-0285-01Z-00-DX1_21600_7200.png
TCGA-02-0285-01Z-00-DX1_21900_11700.png
TCGA-02-0285-01Z-00-DX1_21900_1200.png
TCGA-02-0285-01Z-00-DX1_21900_16800.png
TCGA-02-0285-01Z-00-DX1_21900_24000.png
TCGA-02-0285-01Z-00-DX1_21900_25800.png
TCGA-02-0285-01Z-00-DX1_21900_9000.png
TCGA-02-0285-01Z-00-DX1_22200_15000.png
TCGA-02-0285-01Z-00-DX1_22200_16500.png
TCGA-02-0285-01Z-00-DX1_22200_18600.png
TCGA-02-0285

TCGA-02-0338-01Z-00-DX1_10500_15000.png
TCGA-02-0338-01Z-00-DX1_10500_16800.png
TCGA-02-0338-01Z-00-DX1_10500_17400.png
TCGA-02-0338-01Z-00-DX1_10500_18300.png
TCGA-02-0338-01Z-00-DX1_10500_2700.png
TCGA-02-0338-01Z-00-DX1_10500_7500.png
TCGA-02-0338-01Z-00-DX1_10500_9900.png
TCGA-02-0338-01Z-00-DX1_10800_12600.png
TCGA-02-0338-01Z-00-DX1_10800_16200.png
TCGA-02-0338-01Z-00-DX1_10800_18000.png
TCGA-02-0338-01Z-00-DX1_10800_4200.png
TCGA-02-0338-01Z-00-DX1_10800_4800.png
TCGA-02-0338-01Z-00-DX1_10800_9300.png
TCGA-02-0338-01Z-00-DX1_11100_15300.png
TCGA-02-0338-01Z-00-DX1_11100_17400.png
TCGA-02-0338-01Z-00-DX1_11100_18300.png
TCGA-02-0338-01Z-00-DX1_11100_3600.png
TCGA-02-0338-01Z-00-DX1_11100_5700.png
TCGA-02-0338-01Z-00-DX1_11100_9000.png
TCGA-02-0338-01Z-00-DX1_11100_9900.png
TCGA-02-0338-01Z-00-DX1_11400_12300.png
TCGA-02-0338-01Z-00-DX1_11400_17700.png
TCGA-02-0338-01Z-00-DX1_11400_18300.png
TCGA-02-0338-01Z-00-DX1_11400_7800.png
TCGA-02-0338-01Z-00-DX1_11400_9000.png
TCGA-02-0338

TCGA-DB-A64L-01Z-00-DX1_27000_33900.png
TCGA-DB-A64L-01Z-00-DX1_27000_36300.png
TCGA-DB-A64L-01Z-00-DX1_27000_8700.png
TCGA-DB-A64L-01Z-00-DX1_27300_53100.png
TCGA-DB-A64L-01Z-00-DX1_27600_4800.png
TCGA-DB-A64L-01Z-00-DX1_27900_28800.png
TCGA-DB-A64L-01Z-00-DX1_27900_4500.png
TCGA-DB-A64L-01Z-00-DX1_27900_52800.png
TCGA-DB-A64L-01Z-00-DX1_28200_30300.png
TCGA-DB-A64L-01Z-00-DX1_28200_56700.png
TCGA-DB-A64L-01Z-00-DX1_28500_25200.png
TCGA-DB-A64L-01Z-00-DX1_28500_8400.png
TCGA-DB-A64L-01Z-00-DX1_28800_54600.png
TCGA-DB-A64L-01Z-00-DX1_29100_13800.png
TCGA-DB-A64L-01Z-00-DX1_29700_19800.png
TCGA-DB-A64L-01Z-00-DX1_29700_52500.png
TCGA-DB-A64L-01Z-00-DX1_30000_22800.png
TCGA-DB-A64L-01Z-00-DX1_30000_33300.png
TCGA-DB-A64L-01Z-00-DX1_30300_10500.png
TCGA-DB-A64L-01Z-00-DX1_30300_24000.png
TCGA-DB-A64L-01Z-00-DX1_30600_17400.png
TCGA-DB-A64L-01Z-00-DX1_30600_6300.png
TCGA-DB-A64L-01Z-00-DX1_30900_51900.png
TCGA-DB-A64L-01Z-00-DX1_31500_17100.png
TCGA-DB-A64L-01Z-00-DX1_32100_13800.png
TCGA-

TCGA-DU-5874-01Z-00-DX1_7500_12600.png
TCGA-DU-5874-01Z-00-DX1_7500_15300.png
TCGA-DU-5874-01Z-00-DX1_7500_19200.png
TCGA-DU-5874-01Z-00-DX1_7500_25200.png
TCGA-DU-5874-01Z-00-DX1_7500_29100.png
TCGA-DU-5874-01Z-00-DX1_7500_7200.png
TCGA-DU-5874-01Z-00-DX1_7500_7800.png
TCGA-DU-5874-01Z-00-DX1_7800_22500.png
TCGA-DU-5874-01Z-00-DX1_7800_25500.png
TCGA-DU-5874-01Z-00-DX1_7800_28200.png
TCGA-DU-5874-01Z-00-DX1_7800_9600.png
TCGA-DU-5874-01Z-00-DX1_7800_9900.png
TCGA-DU-5874-01Z-00-DX1_8100_11700.png
TCGA-DU-5874-01Z-00-DX1_8100_15300.png
TCGA-DU-5874-01Z-00-DX1_8100_24900.png
TCGA-DU-5874-01Z-00-DX1_8100_25200.png
TCGA-DU-5874-01Z-00-DX1_8100_28500.png
TCGA-DU-5874-01Z-00-DX1_8100_8100.png
TCGA-DU-5874-01Z-00-DX1_8400_20400.png
TCGA-DU-5874-01Z-00-DX1_8400_21000.png
TCGA-DU-5874-01Z-00-DX1_8400_21900.png
TCGA-DU-5874-01Z-00-DX1_8400_22800.png
TCGA-DU-5874-01Z-00-DX1_8400_23700.png
TCGA-DU-5874-01Z-00-DX1_8400_24300.png
TCGA-DU-5874-01Z-00-DX1_8400_28200.png
TCGA-DU-5874-01Z-00-DX1_8400_2

TCGA-DB-5270-01Z-00-DX2_59700_35100.png
TCGA-DB-5270-01Z-00-DX2_60000_48900.png
TCGA-DB-5270-01Z-00-DX2_60000_71700.png
TCGA-DB-5270-01Z-00-DX2_60600_48600.png
TCGA-DB-5270-01Z-00-DX2_60600_63900.png
TCGA-DB-5270-01Z-00-DX2_60900_22800.png
TCGA-DB-5270-01Z-00-DX2_60900_56700.png
TCGA-DB-5270-01Z-00-DX2_61500_59700.png
TCGA-DB-5270-01Z-00-DX2_61800_58500.png
TCGA-DB-5270-01Z-00-DX2_62700_29700.png
TCGA-DB-5270-01Z-00-DX2_62700_52500.png
TCGA-DB-5270-01Z-00-DX2_62700_74700.png
TCGA-DB-5270-01Z-00-DX2_63000_68700.png
TCGA-DB-5270-01Z-00-DX2_6300_18000.png
TCGA-DB-5270-01Z-00-DX2_63300_13500.png
TCGA-DB-5270-01Z-00-DX2_63300_22200.png
TCGA-DB-5270-01Z-00-DX2_63600_13500.png
TCGA-DB-5270-01Z-00-DX2_63900_72600.png
TCGA-DB-5270-01Z-00-DX2_64200_22200.png
TCGA-DB-5270-01Z-00-DX2_64200_70500.png
TCGA-DB-5270-01Z-00-DX2_64200_70800.png
TCGA-DB-5270-01Z-00-DX2_64500_11100.png
TCGA-DB-5270-01Z-00-DX2_64500_72600.png
TCGA-DB-5270-01Z-00-DX2_64800_15300.png
TCGA-DB-5270-01Z-00-DX2_64800_70800.png
T

TCGA-DB-5274-01Z-00-DX1_40500_37500.png
TCGA-DB-5274-01Z-00-DX1_40800_27600.png
TCGA-DB-5274-01Z-00-DX1_40800_42000.png
TCGA-DB-5274-01Z-00-DX1_40800_46200.png
TCGA-DB-5274-01Z-00-DX1_40800_59100.png
TCGA-DB-5274-01Z-00-DX1_40800_63300.png
TCGA-DB-5274-01Z-00-DX1_41100_54000.png
TCGA-DB-5274-01Z-00-DX1_41400_16800.png
TCGA-DB-5274-01Z-00-DX1_41700_17400.png
TCGA-DB-5274-01Z-00-DX1_41700_22500.png
TCGA-DB-5274-01Z-00-DX1_41700_43500.png
TCGA-DB-5274-01Z-00-DX1_41700_63900.png
TCGA-DB-5274-01Z-00-DX1_42600_15900.png
TCGA-DB-5274-01Z-00-DX1_42600_28800.png
TCGA-DB-5274-01Z-00-DX1_42900_40800.png
TCGA-DB-5274-01Z-00-DX1_43200_28500.png
TCGA-DB-5274-01Z-00-DX1_43200_29400.png
TCGA-DB-5274-01Z-00-DX1_43500_35100.png
TCGA-DB-5274-01Z-00-DX1_43800_49200.png
TCGA-DB-5274-01Z-00-DX1_43800_51900.png
TCGA-DB-5274-01Z-00-DX1_43800_58200.png
TCGA-DB-5274-01Z-00-DX1_44100_19500.png
TCGA-DB-5274-01Z-00-DX1_44400_16200.png
TCGA-DB-5274-01Z-00-DX1_44400_47100.png
TCGA-DB-5274-01Z-00-DX1_44400_50100.png


Number of original patches= 6364
Number of original WSI= 17
Number of remaining WSI= 17
Number of filtered patches= 589

TCGA-CS-4943-01Z-00-DX1_10200_30300.png
TCGA-CS-4943-01Z-00-DX1_10200_30600.png
TCGA-CS-4943-01Z-00-DX1_10200_30900.png
TCGA-CS-4943-01Z-00-DX1_10200_31200.png
TCGA-CS-4943-01Z-00-DX1_10200_31500.png
TCGA-CS-4943-01Z-00-DX1_10200_31800.png
TCGA-CS-4943-01Z-00-DX1_10200_32100.png
TCGA-CS-4943-01Z-00-DX1_10200_32400.png
TCGA-CS-4943-01Z-00-DX1_10500_24300.png
TCGA-CS-4943-01Z-00-DX1_10500_30600.png
TCGA-CS-4943-01Z-00-DX1_10500_30900.png
TCGA-CS-4943-01Z-00-DX1_10500_31200.png
TCGA-CS-4943-01Z-00-DX1_10500_31500.png
TCGA-CS-4943-01Z-00-DX1_10500_31800.png
TCGA-CS-4943-01Z-00-DX1_10500_32400.png
TCGA-CS-4943-01Z-00-DX1_10500_32700.png
TCGA-CS-4943-01Z-00-DX1_10500_4500.png
TCGA-CS-4943-01Z-00-DX1_10500_6600.png
TCGA-CS-4943-01Z-00-DX1_10800_15300.png
TCGA-CS-4943-01Z-00-DX1_10800_30600.png
TCGA-CS-4943-01Z-00-DX1_10800_30900.png
TCGA-CS-4943-01Z-00-DX1_10800_31500.png
T

TCGA-CS-5397-01Z-00-DX1_29100_12300.png
TCGA-CS-5397-01Z-00-DX1_29100_55500.png
TCGA-CS-5397-01Z-00-DX1_29100_56700.png
TCGA-CS-5397-01Z-00-DX1_29400_13200.png
TCGA-CS-5397-01Z-00-DX1_29400_41400.png
TCGA-CS-5397-01Z-00-DX1_29400_43800.png
TCGA-CS-5397-01Z-00-DX1_29400_57600.png
TCGA-CS-5397-01Z-00-DX1_29400_62100.png
TCGA-CS-5397-01Z-00-DX1_29400_63300.png
TCGA-CS-5397-01Z-00-DX1_29400_63900.png
TCGA-CS-5397-01Z-00-DX1_29700_34500.png
TCGA-CS-5397-01Z-00-DX1_29700_53700.png
TCGA-CS-5397-01Z-00-DX1_29700_54000.png
TCGA-CS-5397-01Z-00-DX1_29700_60300.png
TCGA-CS-5397-01Z-00-DX1_29700_67800.png
TCGA-CS-5397-01Z-00-DX1_30000_13500.png
TCGA-CS-5397-01Z-00-DX1_30000_43800.png
TCGA-CS-5397-01Z-00-DX1_30000_61500.png
TCGA-CS-5397-01Z-00-DX1_30000_65100.png
TCGA-CS-5397-01Z-00-DX1_30000_65400.png
TCGA-CS-5397-01Z-00-DX1_30300_16200.png
TCGA-CS-5397-01Z-00-DX1_30300_35100.png
TCGA-CS-5397-01Z-00-DX1_30600_12300.png
TCGA-CS-5397-01Z-00-DX1_30600_18300.png
TCGA-CS-5397-01Z-00-DX1_30600_67500.png


TCGA-DB-A4XF-01Z-00-DX1_35100_37200.png
TCGA-DB-A4XF-01Z-00-DX1_35100_45600.png
TCGA-DB-A4XF-01Z-00-DX1_35400_42600.png
TCGA-DB-A4XF-01Z-00-DX1_35400_44100.png
TCGA-DB-A4XF-01Z-00-DX1_35400_63600.png
TCGA-DB-A4XF-01Z-00-DX1_35400_63900.png
TCGA-DB-A4XF-01Z-00-DX1_35400_64800.png
TCGA-DB-A4XF-01Z-00-DX1_35700_64200.png
TCGA-DB-A4XF-01Z-00-DX1_35700_65400.png
TCGA-DB-A4XF-01Z-00-DX1_36000_23400.png
TCGA-DB-A4XF-01Z-00-DX1_36000_64200.png
TCGA-DB-A4XF-01Z-00-DX1_36000_64800.png
TCGA-DB-A4XF-01Z-00-DX1_36300_58200.png
TCGA-DB-A4XF-01Z-00-DX1_36300_63900.png
TCGA-DB-A4XF-01Z-00-DX1_36300_9900.png
TCGA-DB-A4XF-01Z-00-DX1_36600_63300.png
TCGA-DB-A4XF-01Z-00-DX1_36900_19200.png
TCGA-DB-A4XF-01Z-00-DX1_36900_26100.png
TCGA-DB-A4XF-01Z-00-DX1_36900_9600.png
TCGA-DB-A4XF-01Z-00-DX1_37200_64800.png
TCGA-DB-A4XF-01Z-00-DX1_37500_24000.png
TCGA-DB-A4XF-01Z-00-DX1_37500_41100.png
TCGA-DB-A4XF-01Z-00-DX1_37500_42900.png
TCGA-DB-A4XF-01Z-00-DX1_37500_49800.png
TCGA-DB-A4XF-01Z-00-DX1_37800_64200.png
TC

In [6]:
print(datetime.now())
print('Number of WSI lost due to filtering:',lost_wsi)
print()
print('Totals per cancer class')
print('Original:',num_originals.astype(int))
print('Filtered:',num_filtered.astype(int))
percents = 100.0 * num_filtered / num_originals
print('Percent:',percents.astype(int))
print('Overall total')
print('Original',sum(num_originals))
print('Filtered',sum(num_filtered))
print('Percent:',100.0 * sum(num_filtered) / sum(num_originals))

2022-07-23 10:46:14.830573
Number of WSI lost due to filtering: 0

Totals per cancer class
Original: [44966 12902 13950  6364  3197  1591]
Filtered: [3677  778 1741  589  347   73]
Percent: [ 8  6 12  9 10  4]
Overall total
Original 82970.0
Filtered 7205.0
Percent: 8.68386163673617
