# Table data

## 0. Imports

In [1]:
import pandas as pd
import numpy as np
import os, csv

def to_csv(df, name):
    df.to_csv(name, index = False)

## 1. Generate `all.csv`

The `all.csv` dataset contains all the tables from the `full_1634313571272.xlsx` excel file joined together. Some of tables in this excel file have the same name, but different meanings. To distinguish them, every column of the `all.csv` has a number next to it, represeting the number of its corresponding table.

In [2]:
def get_all():
    all_tables = pd.DataFrame({})
    i = 1
    for file in sorted(os.listdir("../table-data/data/CSVs")):
        if file != "data-merged.csv":
            table = pd.read_csv("../table-data/data/CSVs/"+file)
            for col in table.columns:
                all_tables[f"{col}-{i}"] = table[col]
        i += 1
    return all_tables

all_tables = get_all()
all_tables

Unnamed: 0,idEstudo-1,idProcessoLocal-1,numRegistoGeral-1,numRegistoHospital-1,iniciaisNome-1,dataRegInicial-1,dataNascimento-1,altura-1,peso-1,profissao-1,...,ret-32,atCal-32,evo-32,ligA-32,ligB-32,ligC-32,ligD-32,ligE-32,hab-32,total-32
0,1.10,1876470,1,1,CHSP,2017-10-11 14:16:08.014000,1938-01-30 00:00:00,167,74,Reformado - Guarda prisional,...,3,5,3,2,1,3,1,1,1,30
1,1.20,2415798,2,2,NLMMF,2017-10-16 17:38:28.873000,1959-04-27 22:59:59.997000,185,86,Professor universitário,...,3,5,3,2,1,3,1,1,1,30
2,6.10,17044593,3,1,MFSMF,2017-11-06 11:14:46.824000,1938-05-25 22:59:59.997000,150,45,,...,0,0,0,0,0,0,0,0,0,0
3,6.20,17044640,4,2,NRM,2017-11-06 14:43:07.634000,1932-10-13 22:59:59.997000,160,60,reformada,...,,,,,,,,,,
4,6.30,17044934,5,3,JVDN,2017-11-08 11:54:00.602000,1946-11-04 00:00:00,165,80,psicólogo,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,6.16,5032101,836,16,FMFL,2021-02-09 15:10:33.390000,1952-06-13 22:59:59.997000,160,62,médico,...,0,0,0,0,0,0,0,0,0,0
836,6.17,18027449,837,17,MJBPFG,2021-02-09 15:28:57.858000,1938-11-22 00:00:00,161,63,reformada,...,,,,,,,,,,
837,6.18,18027709,838,18,BJE,2021-02-09 16:04:47.076000,1968-04-15 22:59:59.997000,170,95,biologo marinho,...,0,0,0,0,0,0,0,0,0,0
838,6.19,18027953,839,19,LAPHS,2021-02-10 12:25:16,1934-03-09 00:00:00,178,90,reformado,...,,,,,,,,,,


## 2. Filter `all.csv`

We are only interested in some of the variables from `all.csv`. Since the target variable is the 3 month rankin score, all the variables collected after this moment are discarded. Additionally, variables that directly identify the patient (like its name initials) are discarded. 

The following is the list of variables we want to keep for each of the tables whose content was recorded before the 3 month mark.

In [3]:
tables = {}
tables["01-GERAL"]                                 = ["numRegistoGeral-1", "idProcessoLocal-1", "dataNascimento-1", 
                                                      "altura-1", "peso-1", "genero-1", "etnia-1", "viveSozinho-1", 
                                                      "escola-1"]
tables["02-ANTECEDENTES-PESSOAIS"]                 = ["hiperTen-2", "diabetes-2", "dislip-2", "avcIsqPre-2", 
                                                      "avcHemPre-2", "aitPre-2", "apnSono-2", "insCard-2", 
                                                      "cardIsq-2", "doeArt-2", "infecao-2", "histFam-2", 
                                                      "estMenP-2", "tabag-2", "alcool-2", "rankin-2"]
tables["04-CARACTERIZACAO-AVC"]                    = ["dataAVC-4", "avcAcordar-4", "instAVCpre-4", "defMaxIn-4", 
                                                      "melSintIn-4", "cefaleia-4", "criseEpi-4", "hemoAd-4", 
                                                      "hemat-4", "inrAd-4", "gliceAd-4"]
tables["05-CARACTERIZACAO-AVC - NIHSS"]            = ["afasia-5", "loc-5", "com-5", "ling-5", "paresia-5", "face-5", 
                                                      "memSE-5", "memSD-5", "memIE-5", "memID-5", "altCons-5", 
                                                      "altMovOc-5", "altVis-5", "atax-5", "hipos-5", "disartia-5", 
                                                      "inatencao-5", "totalNIHSS-5", "disf-5", "incUri-5", 
                                                      "preArtSis-5", "preArtDia-5", "ritmoCardECG-5"]
tables["07-IMAGEM INICIAL - TCCE"]                 = ["aspects-7", "ouTerrIsq-7", "ouTerrIsqL-7", "ouTerrIsqOutro-7",
                                                      "sinCorda-7", "lacAnt-7", "lacAntL-7", "leucoa-7", "enfAnt-7", 
                                                      "enfAntL-7", "enfAntOutro-7", "data-7"]
tables["08-IMAGEM INICIAL - AngioTCCE"]            = ["colaCTA1-8", "colaCTA2a-8", "colaCTA2b-8"]
tables["09-IMAGEM INICIAL - AngioTCCE - Estenose"] = ["ocEst-9", "localiz-9", "lado-9"]
tables["10-IMAGEM INICIAL - AngioTCCE - Oclusao"]  = ["ocEst-10", "localiz-10", "lado-10"]
tables["11-TRATAMENTO AGUDO AVC"]                  = ["rtPA-11", "tromb-11", "recaTIC-11"]
tables["15-AVALIACAO CLINICA 24H - NIHSS"]         = ["afasia-15", "loc-15", "com-15", "ling-15", "paresia-15", 
                                                      "face-15", "memSE-15", "memSD-15", "memIE-15", "memID-15", 
                                                      "altCons-15", "altMovOc-15", "altVis-15", "atax-15", 
                                                      "hipos-15", "disartia-15", "inatencao-15", "totalNIHSS-15", 
                                                      "disf-15", "incUri-15", "preArtSis-15", "preArtDia-15", 
                                                      "ritmoCardECG-15"]
tables["18-ALTA"]                                  = ["destino-18", "rankin-18"]
tables["19-ALTA - NIHSS"]                          = ["afasia-19", "loc-19", "com-19", "ling-19", "paresia-19", 
                                                      "face-19", "memSE-19", "memSD-19", "memIE-19", "memID-19", 
                                                      "altCons-19", "altMovOc-19", "altVis-19", "atax-19", 
                                                      "hipos-19", "disartia-19", "inatencao-19", "totalNIHSS-19", 
                                                      "disf-19", "incUri-19", "preArtSis-19", "preArtDia-19", 
                                                      "ritmoCardECG-19"]
tables["20-ALTA - MOCA"]                           = ["capVis-20", "nomea-20", "atencao-20", "ling-20", "abst-20", 
                                                      "evoDif-20", "orien-20", "total-20"]
tables["21-ALTA - HADS"]                           = ["valorP1-21", "valorP2-21", "valorP3-21", "valorP4-21", 
                                                      "valorP5-21", "valorP6-21", "valorP7-21", "valorP8-21", 
                                                      "valorP9-21", "valorP10-21", "valorP11-21", "valorP12-21", 
                                                      "valorP13-21", "valorP14-21", "totalA-21", "totalD-21"]
tables["22-ALTA - MINI MENTAL STATE"]              = ["ori1-22", "ori2-22", "ret-22", "atCal-22", "evo-22", 
                                                      "ligA-22", "ligB-22", "ligC-22", "ligD-22", "ligE-22", 
                                                      "hab-22", "total-22"]
tables["23-AVALIACAO 3 MESES"]                     = ["rankin-23"]

Now we can filter the `all.csv` using these variables list:

In [4]:
variables       = [v for t in tables for v in tables[t]]
tables_filtered = pd.DataFrame(all_tables[variables])
print(f"Using {len(variables)} variables")
tables_filtered

Using 168 variables


Unnamed: 0,numRegistoGeral-1,idProcessoLocal-1,dataNascimento-1,altura-1,peso-1,genero-1,etnia-1,viveSozinho-1,escola-1,hiperTen-2,...,atCal-22,evo-22,ligA-22,ligB-22,ligC-22,ligD-22,ligE-22,hab-22,total-22,rankin-23
0,1,1876470,1938-01-30 00:00:00,167,74,2,1,1,2,1,...,5,2,2,1,0,1,0,0,24,2
1,2,2415798,1959-04-27 22:59:59.997000,185,86,2,1,0,4,1,...,5,3,2,1,3,1,1,1,30,0
2,3,17044593,1938-05-25 22:59:59.997000,150,45,1,1,0,1,0,...,0,0,0,0,0,0,0,0,0,6
3,4,17044640,1932-10-13 22:59:59.997000,160,60,1,1,0,2,1,...,0,0,0,0,0,0,0,0,0,
4,5,17044934,1946-11-04 00:00:00,165,80,2,1,0,4,1,...,0,0,0,0,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,836,5032101,1952-06-13 22:59:59.997000,160,62,2,1,1,4,0,...,0,0,0,0,0,0,0,0,0,0
836,837,18027449,1938-11-22 00:00:00,161,63,1,1,0,2,0,...,0,0,0,0,0,0,0,0,0,0
837,838,18027709,1968-04-15 22:59:59.997000,170,95,2,1,0,4,1,...,0,0,0,0,0,0,0,0,0,
838,839,18027953,1934-03-09 00:00:00,178,90,2,1,1,3,1,...,0,0,0,0,0,0,0,0,0,2


## 3. Add `NCCT` and `CTA` columns

Add these two columns that when they are 1, its corresponding patient has the corresponding CT scan and when they are 0, they don't.

In [16]:
nccts     = [file[:-4] for file in os.listdir("../../data/gravo/NCCT") if file.endswith(".nii")]
ctas      = [file[:-4] for file in os.listdir("../../data/gravo/CTA") if file.endswith(".nii")]
has_ncct  = []
has_cta   = []
scans     = pd.read_csv("../../data/gravo/gravo.csv")

for _, row in tables_filtered.iterrows():
    patient_id = row["idProcessoLocal-1"]
    if patient_id in nccts:
        has_ncct.append( scans[scans["idProcessoLocal"] == int(patient_id)]["NCCT"].values[0] )
    else:
        has_ncct.append("missing")
    if patient_id in ctas:
        has_cta.append( scans[scans["idProcessoLocal"] == int(patient_id)]["CTA"].values[0] )
    else:
        has_cta.append("missing")
    
tables_filtered["NCCT"] = has_ncct
tables_filtered["CTA"]  = has_cta
tables_filtered

Unnamed: 0,numRegistoGeral-1,idProcessoLocal-1,dataNascimento-1,altura-1,peso-1,genero-1,etnia-1,viveSozinho-1,escola-1,hiperTen-2,...,ligA-22,ligB-22,ligC-22,ligD-22,ligE-22,hab-22,total-22,rankin-23,NCCT,CTA
0,1,1876470,1938-01-30 00:00:00,167,74,2,1,1,2,1,...,2,1,0,1,0,0,24,2,000007F2,0000D43F
1,2,2415798,1959-04-27 22:59:59.997000,185,86,2,1,0,4,1,...,2,1,3,1,1,1,30,0,00008DA1,000001B3
2,3,17044593,1938-05-25 22:59:59.997000,150,45,1,1,0,1,0,...,0,0,0,0,0,0,0,6,missing,missing
3,4,17044640,1932-10-13 22:59:59.997000,160,60,1,1,0,2,1,...,0,0,0,0,0,0,0,,missing,missing
4,5,17044934,1946-11-04 00:00:00,165,80,2,1,0,4,1,...,0,0,0,0,0,0,0,,missing,missing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835,836,5032101,1952-06-13 22:59:59.997000,160,62,2,1,1,4,0,...,0,0,0,0,0,0,0,0,missing,missing
836,837,18027449,1938-11-22 00:00:00,161,63,1,1,0,2,0,...,0,0,0,0,0,0,0,0,missing,missing
837,838,18027709,1968-04-15 22:59:59.997000,170,95,2,1,0,4,1,...,0,0,0,0,0,0,0,,missing,missing
838,839,18027953,1934-03-09 00:00:00,178,90,2,1,1,3,1,...,0,0,0,0,0,0,0,2,missing,missing


# REMOVE

In [41]:
import datetime, numpy

dates = []
for date in tables_filtered["data-7"].values:
    if (date == "None") or (date == "0"):
        dates.append(numpy.inf)
    else:
        if len(date) > 10:
            date = date[:10]
        date = datetime.datetime.strptime(date,"%Y-%m-%d")
        date = datetime.datetime.timestamp(date)
        dates.append(date)
tables_filtered["data-7"] = dates
tables_filtered["to_sort"] = - tables_filtered["NCCT"]

In [67]:
a = tables_filtered.sort_values(by = "data-7")
a = a[a["NCCT"]==1]
vars_ = "idProcessoLocal-1", "data-7", "aspects-7" "ouTerrIsq-7", "ouTerrIsqOutro-7", 
"sinCorda-7", "lacAnt-7", "leucoa-7", "enfAnt-7", "enfAntOutro-7"

id	data	 aspects	ouTerrIsq	ouTerrIsqOutro	sinCorda		lacAnt	leucoa	enfAnt	enfAntOutro


In [102]:
i = 0
header = "i\tid\tdata\t\taspects\touTerrIsq\tsinCorda\tlacAnt\tleucoa\tenfAnt"
print(header)
for _,row in a.iterrows():
    i += 1
    if i%10 == 0:
        print(header)
    print(i, end = "\t")
    print(row["idProcessoLocal-1"], end = "\t")
    if row["data-7"]==numpy.inf:
        print("?", end = "\t\t")
    else:
        print(str(datetime.datetime.fromtimestamp(row["data-7"]))[:10], end = "\t")
    print(row["aspects-7"], end = "\t")
    print(row["ouTerrIsq-7"].replace("true","1").replace("false","0"), end = "\t\t")
    print(row["sinCorda-7"], end = "\t\t")
    print(row["lacAnt-7"], end = "\t")
    print(row["leucoa-7"], end = "\t")
    print(row["enfAnt-7"].replace("true","1").replace("false","0"), end = "\t")
    print()

i	id	data		aspects	ouTerrIsq	sinCorda	lacAnt	leucoa	enfAnt
1	2275415	2017-01-06	10	1,0,0,0		0		0	2	0,0,0,0,0,0,0,0,0	
2	1876470	2017-01-08	10	1,0,0,0		0		1	2	0,0,0,0,0,0,0,0,0	
3	2415798	2017-01-10	8	1,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
4	2415910	2017-01-11	10	1,0,0,0		0		1	2	0,0,0,0,0,0,0,0,0	
5	229088	2017-01-14	6	0,0,0,0		3		1	2	0,0,0,0,0,0,0,0,0	
6	1080577	2017-01-14	9	1,0,0,0		1		1	2	0,0,0,0,0,0,0,0,0	
7	318553	2017-01-14	10	0,0,0,0		3		0	2	0,0,0,0,0,0,0,0,0	
8	253915	2017-01-14	10	1,0,0,0		6		1	1	0,0,0,0,0,0,0,0,0	
9	2417839	2017-01-19	7	1,0,0,0		0		0	1	0,0,0,0,1,0,0,0,0	
i	id	data		aspects	ouTerrIsq	sinCorda	lacAnt	leucoa	enfAnt
10	1954138	2017-01-22	9	1,0,0,0		0		1	1	0,0,0,0,0,0,0,0,0	
11	1847676	2017-01-24	10	0,0,0,0		0		1	2	0,0,0,0,0,0,0,0,0	
12	2419194	2017-01-25	7	1,0,0,0		0		0	1	0,0,0,0,0,0,0,0,0	
13	1208846	2017-01-27	10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
14	86823	2017-01-28	10	0,0,1,0		0		0	0	0,0,0,0,0,0,0,0,0	
15	1846749	2017-01-28	10	1,0,0,0		0		1	1	0,0,0,0,0,0,0,0,0	
16

156	806176	2018-05-07	10	1,0,0,0		0		0	1	0,0,0,0,0,0,0,0,0	
157	1064640	2018-05-13	10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
158	823	2018-05-28	10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
159	624440	2018-07-30	11	0,0,1,1		0		0	1	0,0,0,0,0,0,0,0,0	
i	id	data		aspects	ouTerrIsq	sinCorda	lacAnt	leucoa	enfAnt
160	2552040	2018-08-02	9	0,0,0,0		3		0	1	0,0,0,0,0,0,0,0,0	
161	11	2018-08-16	10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
162	340907	2018-08-28	8	0,0,1,1		6		0	1	0,0,0,0,0,0,0,0,0	
163	2563407	2018-09-15	10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
164	13	2018-09-16	10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
165	655771	2018-09-23	10	0,0,0,0		3		0	0	0,0,0,0,0,0,0,0,0	
166	2510110	2018-09-30	11	0,0,1,0		0		0	0	0,0,0,0,0,1,0,0,0	
167	896704	2018-10-11	11	1,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
168	1856315	2018-11-06	11	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
169	535704	2018-11-09	11	0,0,0,0		0		1	1	0,0,0,0,0,0,0,0,0	
i	id	data		aspects	ouTerrIsq	sinCorda	lacAnt	leucoa	enfAnt
170	210117	2018-11-20	10	0,0,0,0		0		0	0	0,0,0,0,0

312	2473173	?		None	None		None		None	None	None	
313	2637591	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
314	450468	?		9	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
315	2209090	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
316	2637963	?		None	None		None		None	None	None	
317	2503602	?		9	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
318	2265022	?		None	None		None		None	None	None	
319	523872	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
i	id	data		aspects	ouTerrIsq	sinCorda	lacAnt	leucoa	enfAnt
320	465558	?		None	None		None		None	None	None	
321	2393419	?		None	None		None		None	None	None	
322	2636144	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
323	1972885	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
324	2366619	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
325	64230	?		7	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
326	2243971	?		8	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
327	1914780	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
328	2633103	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
329	172575	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
i	id	data		aspects	ouT

468	2538003	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
469	2537706	?		10	0,0,0,0		3		0	1	0,0,0,0,0,0,0,0,0	
i	id	data		aspects	ouTerrIsq	sinCorda	lacAnt	leucoa	enfAnt
470	2517563	?		None	None		None		None	None	None	
471	2537119	?		11	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
472	1306239	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
473	1381354	?		9	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
474	1473870	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
475	2534926	?		7	1,0,0,0		1		0	1	0,0,0,0,0,0,0,0,0	
476	459293	?		10	0,0,0,0		3		0	0	1,0,0,0,0,0,0,0,0	
477	511615	?		4	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
478	678373	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
479	1848736	?		None	None		None		None	None	None	
i	id	data		aspects	ouTerrIsq	sinCorda	lacAnt	leucoa	enfAnt
480	1750147	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
481	2300360	?		None	None		None		None	None	None	
482	861925	?		11	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
483	1322475	?		10	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
484	943895	?		7	0,0,0,0		0		0	0	0,0,0,0,0,0,0,0,0	
485	2

## 4. Add `visible` column

[*Cryptogenic stroke accounts for 30% to 40% of ischemic stroke.*](https://www.ahajournals.org/doi/10.1161/circresaha.116.308447) Cryptogenic strokes are strokes whose cause is unknown. It is not reasonable to ask the model to predict the clinical outcome of the patient if there are no anatomical changes to the patient's brain.

For this reason, this section adds the `visible` column which is 1 if the stroke is *visible* in the CT scan and 0 otherwise.

### 4.1. Fix column `ouTerrIsq-7`

*São 4 booleans, os que só têm 3 vêm de uma versão antiga e podem ser convertidos para a nova adicionando um boolean a "false" ao início. O array de booleans tem a seguinte legenda: ['Não', 'ACA', 'ACP', 'VB']*

*Sendo que se o "Não" estiver seleccionado nenhum dos outros pode estar e vice-versa.*

In [19]:
col = tables_filtered["ouTerrIsq-7"]
for i in range(len(col)):
    if col[i] != "None":
        s = col[i].split(",")
        if len(s) == 3:
            if "true" in s:
                col[i] = "false," + col[i]
            else:
                col[i] = "true," + col[i]
tables_filtered["ouTerrIsq-7"] = col

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


### 4.?. Add the new column to `tables_filtered`

In [46]:
def is_visible(row):
    if (row["aspects-7"] != "None") and (row["aspects-7"] != "10"):
        return 1
    if ((row["ouTerrIsq-7"] != "None") and
       (row["ouTerrIsq-7"] != "true,false,false,false") and (row["ouTerrIsq-7"] != "false,false,false,false")):
        return 1
    if row["ouTerrIsqOutro-7"] != "None":
        return 1
    if (row["sinCorda-7"] != "None") and (row["sinCorda-7"] != "0"):
        return 1
    if (row["lacAnt-7"] != "None") and (row["lacAnt-7"] != "0"):
        return 1
    if (row["enfAnt-7"] != "None") and (row["enfAnt-7"] != "false,false,false,false,false,false,false,false,false"):
        return 1
    if (row["leucoa-7"] != "None") and (row["leucoa-7"] != "0") and (row["leucoa-7"] != "1"):
        return 1
    if row["enfAntOutro-7"] != "None":
        return 1
    if ((row["aspects-7"] == "None") and
        (row["ouTerrIsq-7"] == "None") and
        (row["sinCorda-7"] == "None") and
        (row["lacAnt-7"] == "None") and
        (row["enfAnt-7"] == "None") and
        (row["leucoa-7"] == "None")):
        return None
    return 0

visible = []
for _, row in tables_filtered.iterrows():
    v = is_visible(row)
    if v is None:
        visible.append("missing")
    else:
        visible.append( v )
tables_filtered["visible"] = visible

Interpretation of the results

In [47]:
ones = len(tables_filtered[tables_filtered["visible"] == 1])
zeros = len(tables_filtered[tables_filtered["visible"] == 0])
missing = len(tables_filtered[tables_filtered["visible"] == "missing"])
ones, zeros, nones

(486, 208, 146)

In [48]:
ncct_patients = tables_filtered[tables_filtered["NCCT"] != "missing"]
ncct_no_visible_stroke = ncct_patients[ncct_patients["visible"] == 0]
print("There are", len(ncct_no_visible_stroke), " NCCT scans where no stroke evidence is visible")
print("This corresponds to", round(len(ncct_no_visible_stroke)*100/len(ncct_patients),2), 
      "% of the patients that have a NCCT exam")
rankin = ncct_no_visible_stroke["rankin-23"].values
rankin = [11 if r == "None" else int(r) for r in rankin]
np.bincount(rankin)

There are 134  NCCT scans where no stroke evidence is visible
This corresponds to 27.07 % of the patients that have a NCCT exam


array([53, 20, 18, 12,  5,  2,  5,  0,  0,  0,  0, 19])

## 5. Export csv

In [49]:
tables_filtered.rename(columns = {"idProcessoLocal-1": "idProcessoLocal"}, inplace = True)
to_csv(tables_filtered, "table_data.csv")