In [91]:
! pip install -r requirements.txt -q

You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [92]:
import glob
import matplotlib.pyplot
import os
import pandas as pd
import re

In [93]:
root = '/data/Datasets/stroke/ISBI_FOSCAL/'
doctors = ['Daniel', 'Andres']
count_by = {
    #'doctor': ['Daniel', 'Andres'],
    'type': ['NCCT', 'CT', 'ADC', 'DWI']
}

# Busqueda por doctor
search_pattern_per_doc = lambda doc: '**/Masks/{}/**/*.nii.gz'.format(doc)
paths_per_doc = lambda doc: glob.glob(os.path.join(root, search_pattern_per_doc(doc)), recursive=True)

# Busqueda por paciente
search_pattern_per_pat = lambda pat: '**/{}/**/Masks/**/*.nii.gz'.format(pat)
paths_per_pat = lambda pat: glob.glob(os.path.join(root, search_pattern_per_pat(pat)), recursive=True)

In [94]:
def count_paths(paths, count_by):
    """
    Count the paths by each one of the 'count_by' criteria
    """
    if len(paths) == 0:
        return None
    
    count = {key: {value: 0 for value in value_list} for key, value_list in count_by.items()}
    no_match_keyword = 'NO_MATCH'
    register = {}
    for path in paths:
        # Obtiene el paciente
        paciente = re.search('ACV-...', path.upper())
        
        for key, value_list in count_by.items():
            found = False
            count[key][no_match_keyword] = 0
            
            for value in value_list:
                # Discrimina por valor
                if value.upper() in path.upper() and not found:
                    count[key][value] += 1
                    found = True
        
            # En caso de que el path no encuentre coincidencia
            if not found:
                count[key]['NO_MATCH'] += 1

    return count

def count_by_doc():
    count={}
    for doc in doctors:
        count[doc] = count_paths(paths_per_doc(doc), count_by)['type']
    
    return pd.DataFrame(count)

def count_by_patient():
    """
    Se sigue el estandar de nombre de paciente como ACV-... donde los puntos
    son numeros del 000 al 999
    """
    patient_names = ['ACV-{:03d}'.format(x) for x in range(0, 1000)]
    count = {}
    for p_name in patient_names:
        p_count = count_paths(paths_per_pat(p_name), count_by)
        
        if p_count is not None:
            count[p_name] = p_count['type']
    
    return pd.DataFrame(count.values(), index=count.keys())
    

# Conteo de Mascaras por Doctor

In [95]:
# Conteo por doc
doc_count = count_by_doc()
# Sacan totles por axis 1
doc_count['Total'] = doc_count.Daniel + doc_count.Andres
# Saca totales por axis 0
totales_row = pd.DataFrame([doc_count.values.sum(axis=0)], columns=[col for col in doc_count.columns], index=['Total'])
doc_count = pd.concat([doc_count, totales_row])
doc_count

Unnamed: 0,Daniel,Andres,Total
NCCT,25,20,45
CT,14,1,15
ADC,67,69,136
DWI,66,68,134
NO_MATCH,0,0,0
Total,172,158,330


# Conteo de Mascaras por Paciente

In [96]:
pat_count = count_by_patient()

In [106]:
pat_count

Unnamed: 0,NCCT,CT,ADC,DWI,NO_MATCH
ACV-002,1,0,2,1,0
ACV-005,2,0,2,2,0
ACV-006,2,0,2,2,0
ACV-014,2,0,2,2,0
ACV-019,2,0,2,2,0
...,...,...,...,...,...
ACV-215,0,0,2,2,0
ACV-218,0,1,2,2,0
ACV-219,0,0,1,1,0
ACV-221,0,0,2,2,0


In [None]:
# Cantidad de pacientes con dos marcaciones para NCCT y ADC
both_two = pat_count[(pat_count.NCCT == 2) * (pat_count.ADC == 2)]
print(f'Cantidad de pacientes con dos marcaciones para NCCT y ADC: {both_two.shape[0]}')
print(f'Los pacientes con dos marcaciones para NCCT y ADC son: {list(both_two.index)}')
print('\n')

# Cantidad de pacientes con al menos 1 marcacion para NCCT y ADC
at_least_one = pat_count[(pat_count.NCCT >= 1) * (pat_count.ADC >= 1)]
print(f'Cantidad de pacientes con al menos 1 marcacion para NCCT y ADC: {at_least_one.shape[0]}')
print(f'Los pacientes con al menos 1 marcacion para NCCT y ADC son: {list(at_least_one.index)}')
print('\n')

# Cantidad de pacientes con al menos 1 marcacion para DWI y ADC
alo_dwi_adc = pat_count[(pat_count.ADC >= 1) * (pat_count.DWI >=1)]
print(f'Cantidad de pacientes con al menos 1 marcacion para DWI y ADC: {alo_dwi_adc.shape[0]}')
print(f'Los pacientes con al menos 1 marcación para DWI y ADC son: {list(alo_dwi_adc.index)}')
