# Filtro de dados

Notebook criado com objetivo de selecionar os dados interessantes da base Parkinson’s Progression Markers Initiative (PPMI) a serem utilizados no trabalho de Introdução a Ciência dos Dados (ICD) ministrada pelo Departamento de Ciência da Computação (DCC) da Universidade Federal de Minas Gerais (UFMG).

In [1]:
import pickle as pkl

## Representação intermediária

Criação do dicionário utilizado para representar as tabelas do banco consideradas interessantes para o trabalho, isto é, as tabelas que contém dados utilizáveis para previsão e classificação da doença de Parkinson.

In [2]:
interest = {}

## Adição das colunas

Adição das colunas de cada arquivo para a representação intermediária do banco

In [3]:
interest["biospecimen"] = \
[
    "PATNO",
    "CLINICAL_EVENT",
    "TYPE",
    "TESTNAME",
    "TESTVALUE",
    "UNITS"
]

In [4]:
interest["pd_features"] = \
[
    "PATNO",
    "SXMO",
    "SXYEAR",
    "PDDXDT"
]

In [5]:
interest["pd_medication"] = \
[
    "PATNO",
    "EVENT_ID",
    "PDMEDYN",
    "ONLDOPA",
    "ONDOPAG",
    "ONOTHER"
]

In [6]:
interest["vital_sign"] = \
[
    "PATNO",
    "EVENT_ID",
    "WGTKG",
    "HTCM"
]

In [7]:
interest["neuro_cranial"] = \
[
    "PATNO",
    "EVENT_ID",
    "CN1RSP",
    "CN2RSP",
    "CN346RSP",
    "CN5RSP",
    "CN7RSP",
    "CN8RSP",
    "CN910RSP",
    "CN11RSP",
    "CN12RSP"
]

In [8]:
interest["updrs1"] = \
[
    "PATNO",
    "EVENT_ID",
    "INFODT",
    "NP1COG",
    "NP1HALL",
    "NP1DPRS",
    "NP1ANXS",
    "NP1APAT",
    "NP1DDS"
]

In [9]:
interest["updrs1pq"] = \
[
    "PATNO",
    "EVENT_ID",
    "NP1SLPN",
    "NP1SLPD",
    "NP1PAIN",
    "NP1URIN",
    "NP1CNST",
    "NP1LTHD",
    "NP1FATG"
]

In [10]:
interest["updrs2pq"] = \
[
    "PATNO",
    "EVENT_ID",
    "NP2SPCH",
    "NP2SALV",
    "NP2SWAL",
    "NP2EAT",
    "NP2DRES",
    "NP2HYGN",
    "NP2HWRT",
    "NP2HOBB",
    "NP2TURN",
    "NP2TRMR",
    "NP2RISE",
    "NP2WALK",
    "NP2FREZ"
]

In [11]:
interest["updrs3_temp"] = \
[
    "PATNO",
    "EVENT_ID",
    "PAG_NAME",
    "CMEDTM",
    "EXAMTM",
    "NP3SPCH",
    "NP3FACXP",
    "NP3RIGN",
    "NP3RIGRU",
    "NP3RIGLU",
    "PN3RIGRL",
    "NP3RIGLL",
    "NP3FTAPR",
    "NP3FTAPL",
    "NP3HMOVR",
    "NP3HMOVL",
    "NP3PRSPR",
    "NP3PRSPL",
    "NP3TTAPR",
    "NP3TTAPL",
    "NP3LGAGR",
    "NP3LGAGL",
    "NP3RISNG",
    "NP3GAIT",
    "NP3FRZGT",
    "NP3PSTBL",
    "NP3POSTR",
    "NP3BRADY",
    "NP3PTRMR",
    "NP3PTRML",
    "NP3KTRMR",
    "NP3KTRML",
    "NP3RTARU",
    "NP3RTALU",
    "NP3RTARL",
    "NP3RTALL",
    "NP3RTALJ",
    "NP3RTCON",
    "DYSKPRES",
    "DYSKIRAT",
    "NHY",
    "ANNUAL_TIME_BTW_DOSE_NUPDRS",
    "ON_OFF_DOSE",
    "PD_MED_USE"
]

In [12]:
interest["updrs3"] = \
[
    "PATNO",
    "EVENT_ID",
    "NP3SPCH",
    "NP3FACXP",
    "NP3RIGN",
    "NP3RIGRU",
    "NP3RIGLU",
    "PN3RIGRL",
    "NP3RIGLL",
    "NP3FTAPR",
    "NP3FTAPL",
    "NP3HMOVR",
    "NP3HMOVL",
    "NP3PRSPR",
    "NP3PRSPL",
    "NP3TTAPR",
    "NP3TTAPL",
    "NP3LGAGR",
    "NP3LGAGL",
    "NP3RISNG",
    "NP3GAIT",
    "NP3FRZGT",
    "NP3PSTBL",
    "NP3POSTR",
    "NP3BRADY",
    "NP3PTRMR",
    "NP3PTRML",
    "NP3KTRMR",
    "NP3KTRML",
    "NP3RTARU",
    "NP3RTALU",
    "NP3RTARL",
    "NP3RTALL",
    "NP3RTALJ",
    "NP3RTCON"
]

In [13]:
interest["updrs4"] = \
[
    "PATNO",
    "EVENT_ID",
    "NP4WDYSK",
    "NP4DYSKI",
    "NP4OFF",
    "NP4FLCTI",
    "NP4FLCTX",
    "NP4DYSTN"
]

In [14]:
interest["schwab"] = \
[
    "PATNO",
    "EVENT_ID",
    "MSEADLG"
]

In [15]:
interest["pase_house"] = \
[
    "PATNO",
    "EVENT_ID",
    "LTHSWRK",
    "HVYHSWRK",
    "HMREPR",
    "LAWNWRK",
    "OUTGARDN",
    "CAREGVR",
    "WRKVL",
    "WRKVLHR",
    "WRKVLACT"
]

In [16]:
interest["benton"] = \
[
    "PATNO",
    "EVENT_ID",
    "JLO_TOTRAW"
]

In [17]:
interest["cog_catg"] = \
[
    "PATNO",
    "EVENT_ID",
    "COGDECLN",
    "FNCDTCOG",
    "COGSTATE"
]

In [18]:
interest["epworth"] = \
[
    "PATNO",
    "EVENT_ID",
    "ESS1",
    "ESS2",
    "ESS3",
    "ESS4",
    "ESS5",
    "ESS6",
    "ESS7",
    "ESS8"
]

In [19]:
interest["geriatric"] = \
[
    "PATNO",
    "EVENT_ID",
    "GDSSATIS",
    "GDSDROPD",
    "GDSEMPTY",
    "GDSBORED",
    "GDSGSPIR",
    "GDSAFRAD",
    "GDSHAPPY",
    "GDSHLPLS",
    "GDSHOME",
    "GDSMEMRY",
    "GDSALIVE",
    "GDSWRTLS",
    "GDSENRGY",
    "GDSHOPLS",
    "GDSBETER"
]

In [20]:
interest["geriatric_pos"] = \
[
    "GDSDROPD",
    "GDSEMPTY",
    "GDSBORED",
    "GDSAFRAD",
    "GDSHLPLS",
    "GDSHOME",
    "GDSMEMRY",
    "GDSWRTLS",
    "GDSHOPLS",
    "GDSBETER"
]

In [21]:
interest["geriatric_neg"] = \
[
    "GDSSATIS",
    "GDSGSPIR",
    "GDSHAPPY",
    "GDSALIVE",
    "GDSENRGY"
]

In [22]:
interest["hopkins_verbal"] = \
[
    "PATNO",
    "EVENT_ID",
    "HVLTRT1",
    "HVLTRT2",
    "HVLTRT3",
    "HVLTRDLY",
    "HVLTREC",
    "HVLTFPRL",
    "HVLTFPUN"
]

In [23]:
interest["letter_seq"] = \
[
    "PATNO",
    "EVENT_ID",
    "LNS_TOTRAW"
]

In [24]:
interest["moca"] = \
[
    "PATNO",
    "EVENT_ID",
    "MCAALTTM",
    "MCACUBE",
    "MCACLCKC",
    "MCACLCKN",
    "MCACLCKH",
    "MCALION",
    "MCARHINO",
    "MCACAMEL",
    "MCAFDS",
    "MCABDS",
    "MCAVIGIL",
    "MCASER7",
    "MCASNTNC",
    "MCAVFNUM",
    "MCAVF",
    "MCAABSTR",
    "MCAREC1",
    "MCAREC2",
    "MCAREC3",
    "MCAREC4",
    "MCAREC5",
    "MCADATE",
    "MCAMONTH",
    "MCAYR",
    "MCADAY",
    "MCAPLACE",
    "MCACITY",
    "MCATOT"
]

In [25]:
interest["moca_visuospatial"] = \
[
    "MCAALTTM",
    "MCACUBE",
    "MCACLCKC",
    "MCACLCKN",
    "MCACLCKH"
]

In [26]:
interest["moca_naming"] = \
[
    "MCALION",
    "MCARHINO",
    "MCACAMEL"
]

In [27]:
interest["moca_attention"] = \
[
    "MCAFDS",
    "MCABDS",
    "MCAVIGIL",
    "MCASER7"
]

In [28]:
interest["moca_language"] = \
[
    "MCASNTNC",
    "MCAVF"
]

In [29]:
interest["moca_delayed_recall"] = \
[
    "MCAREC1",
    "MCAREC2",
    "MCAREC3",
    "MCAREC4",
    "MCAREC5"
]

In [30]:
interest["moca_orientation"] = \
[
    "MCADATE",
    "MCAMONTH",
    "MCAYR",
    "MCADAY",
    "MCAPLACE",
    "MCACITY"
]

In [31]:
interest["upsit"] = \
[
    "SUBJECT_ID",
    "SCENT_10_RESPONSE",
    "SCENT_09_RESPONSE",
    "SCENT_08_RESPONSE",
    "SCENT_07_RESPONSE",
    "SCENT_06_RESPONSE",
    "SCENT_05_RESPONSE",
    "SCENT_04_RESPONSE",
    "SCENT_03_RESPONSE",
    "SCENT_02_RESPONSE",
    "SCENT_01_RESPONSE",
    "SCENT_20_RESPONSE",
    "SCENT_19_RESPONSE",
    "SCENT_18_RESPONSE",
    "SCENT_17_RESPONSE",
    "SCENT_16_RESPONSE",
    "SCENT_15_RESPONSE",
    "SCENT_14_RESPONSE",
    "SCENT_13_RESPONSE",
    "SCENT_12_RESPONSE",
    "SCENT_11_RESPONSE",
    "SCENT_30_RESPONSE",
    "SCENT_29_RESPONSE",
    "SCENT_28_RESPONSE",
    "SCENT_27_RESPONSE",
    "SCENT_26_RESPONSE",
    "SCENT_25_RESPONSE",
    "SCENT_24_RESPONSE",
    "SCENT_23_RESPONSE",
    "SCENT_22_RESPONSE",
    "SCENT_21_RESPONSE",
    "SCENT_40_RESPONSE",
    "SCENT_39_RESPONSE",
    "SCENT_38_RESPONSE",
    "SCENT_37_RESPONSE",
    "SCENT_36_RESPONSE",
    "SCENT_35_RESPONSE",
    "SCENT_34_RESPONSE",
    "SCENT_33_RESPONSE",
    "SCENT_32_RESPONSE",
    "SCENT_31_RESPONSE",
    "SCENT_10_CORRECT",
    "SCENT_09_CORRECT",
    "SCENT_08_CORRECT",
    "SCENT_07_CORRECT",
    "SCENT_06_CORRECT",
    "SCENT_05_CORRECT",
    "SCENT_04_CORRECT",
    "SCENT_03_CORRECT",
    "SCENT_02_CORRECT",
    "SCENT_01_CORRECT",
    "SCENT_20_CORRECT",
    "SCENT_19_CORRECT",
    "SCENT_18_CORRECT",
    "SCENT_17_CORRECT",
    "SCENT_16_CORRECT",
    "SCENT_15_CORRECT",
    "SCENT_14_CORRECT",
    "SCENT_13_CORRECT",
    "SCENT_12_CORRECT",
    "SCENT_11_CORRECT",
    "SCENT_30_CORRECT",
    "SCENT_29_CORRECT",
    "SCENT_28_CORRECT",
    "SCENT_27_CORRECT",
    "SCENT_26_CORRECT",
    "SCENT_25_CORRECT",
    "SCENT_24_CORRECT",
    "SCENT_23_CORRECT",
    "SCENT_22_CORRECT",
    "SCENT_21_CORRECT",
    "SCENT_40_CORRECT",
    "SCENT_39_CORRECT",
    "SCENT_38_CORRECT",
    "SCENT_37_CORRECT",
    "SCENT_36_CORRECT",
    "SCENT_35_CORRECT",
    "SCENT_34_CORRECT",
    "SCENT_33_CORRECT",
    "SCENT_32_CORRECT",
    "SCENT_31_CORRECT",
    "TOTAL_CORRECT"
]

In [32]:
interest["quip"] = \
[
    "PATNO",
    "EVENT_ID",
    "TMGAMBLE",
    "CNTRLGMB",
    "TMSEX",
    "CNTRLSEX",
    "TMBUY",
    "CNTRLBUY",
    "TMEAT",
    "CNTRLEAT",
    "TMTORACT",
    "TMTMTACT",
    "TMTRWD"
]

In [33]:
interest["rem"] = \
[
    "PATNO",
    "EVENT_ID",
    "DRMVIVID",
    "DRMAGRAC",
    "DRMNOCTB",
    "SLPLMBMV",
    "SLPINJUR",
    "DRMVERBL",
    "DRMFIGHT",
    "DRMUMV",
    "DRMOBJFL",
    "MVAWAKEN",
    "DRMREMEM",
    "SLPDSTRB",
    "STROKE",
    "HETRA",
    "RLS",
    "NARCLPSY",
    "DEPRS",
    "EPILEPSY",
    "BRNINFM"
]

In [34]:
interest["aut"] = \
[
    "PATNO",
    "EVENT_ID",
    "SCAU1",
    "SCAU2",
    "SCAU3",
    "SCAU4",
    "SCAU5",
    "SCAU6",
    "SCAU7",
    "SCAU8",
    "SCAU9",
    "SCAU10",
    "SCAU11",
    "SCAU12",
    "SCAU13",
    "SCAU14",
    "SCAU15",
    "SCAU16",
    "SCAU17",
    "SCAU18",
    "SCAU19",
    "SCAU20",
    "SCAU21",
    "SCAU22",
    "SCAU23",
    "SCAU23A",
    "SCAU23AT",
    "SCAU24",
    "SCAU25",
    "SCAU26A",
    "SCAU26AT",
    "SCAU26B",
    "SCAU26BT",
    "SCAU26C",
    "SCAU26CT",
    "SCAU26D",
    "SCAU26DT"
]

In [35]:
interest["aut_gastrointestinal_up"] = \
[
    "SCAU1",
    "SCAU2",
    "SCAU3"
]

In [36]:
interest["aut_gastrointestinal_down"] = \
[
    "SCAU4",
    "SCAU5",
    "SCAU6",
    "SCAU7"
]

In [37]:
interest["aut_urinary"] = \
[
    "SCAU8",
    "SCAU9",
    "SCAU10",
    "SCAU11",
    "SCAU12",
    "SCAU13"
]

In [38]:
interest["aut_cardiovascular"] = \
[
    "SCAU14",
    "SCAU15",
    "SCAU16"
]

In [39]:
interest["aut_thermoregulatory"] = \
[
    "SCAU17",
    "SCAU18"
]

In [40]:
interest["aut_pupillomotor"] = \
[
    "SCAU19"
]

In [41]:
interest["aut_skin"] = \
[
    "SCAU20",
    "SCAU21"
]

In [42]:
interest["aut_sexual"] = \
[
    "SCAU22",
    "SCAU23",
    "SCAU24",
    "SCAU25"
]

In [43]:
interest["semantic"] = \
[
    "PATNO",
    "EVENT_ID",
    "VLTANIM",
    "VLTVEG",
    "VLTFRUIT"
]

In [44]:
interest["stai"] = \
[
    "PATNO",
    "EVENT_ID",
    "STAIAD1",
    "STAIAD2",
    "STAIAD3",
    "STAIAD4",
    "STAIAD5",
    "STAIAD6",
    "STAIAD7",
    "STAIAD8",
    "STAIAD9",
    "STAIAD10",
    "STAIAD11",
    "STAIAD12",
    "STAIAD13",
    "STAIAD14",
    "STAIAD15",
    "STAIAD16",
    "STAIAD17",
    "STAIAD18",
    "STAIAD19",
    "STAIAD20",
    "STAIAD21",
    "STAIAD22",
    "STAIAD23",
    "STAIAD24",
    "STAIAD25",
    "STAIAD26",
    "STAIAD27",
    "STAIAD28",
    "STAIAD29",
    "STAIAD30",
    "STAIAD31",
    "STAIAD32",
    "STAIAD33",
    "STAIAD34",
    "STAIAD35",
    "STAIAD36",
    "STAIAD37",
    "STAIAD38",
    "STAIAD39",
    "STAIAD40"
]

In [45]:
interest["stai_a_state_pos"] = \
[
    "STAIAD3",
    "STAIAD4",
    "STAIAD6",
    "STAIAD7",
    "STAIAD9",
    "STAIAD12",
    "STAIAD13",
    "STAIAD14",
    "STAIAD17",
    "STAIAD18"
]

In [46]:
interest["stai_a_state_neg"] = \
[
    "STAIAD1",
    "STAIAD2",
    "STAIAD5",
    "STAIAD8",
    "STAIAD10",
    "STAIAD11",
    "STAIAD15",
    "STAIAD16",
    "STAIAD19",
    "STAIAD20"
]

In [47]:
interest["stai_a_trait_pos"] = \
[
    "STAIAD22",
    "STAIAD24",
    "STAIAD25",
    "STAIAD28",
    "STAIAD29",
    "STAIAD31",
    "STAIAD32",
    "STAIAD35",
    "STAIAD37",
    "STAIAD38",
    "STAIAD40"
]

In [48]:
interest["stai_a_trait_neg"] = \
[
    "STAIAD21",
    "STAIAD23",
    "STAIAD26",
    "STAIAD27",
    "STAIAD30",
    "STAIAD33",
    "STAIAD34",
    "STAIAD36",
    "STAIAD39"
]

In [49]:
interest["sdm"] = \
[
    "PATNO",
    "EVENT_ID",
    "SDMTOTAL"]

In [50]:
interest["upsit_booklet"] = [
    "PATNO",
    "EVENT_ID",
    "UPSITBK1",
    "UPSITBK2",
    "UPSITBK3",
    "UPSITBK4"
]

In [51]:
interest["family_history"] = \
[
    "PATNO",
    "BIOMOM",
    "BIOMOMPD",
    "BIODAD",
    "BIODADPD",
    "FULSIB",
    "FULSIBPD",
    "HAFSIB",
    "HAFSIBPD",
    "MAGPAR",
    "MAGPARPD",
    "PAGPAR",
    "PAGPARPD",
    "MATAU",
    "MATAUPD",
    "PATAU",
    "PATAUPD",
    "KIDSNUM",
    "KIDSPD"
]

In [52]:
interest["status"] = \
[
    "PATNO",
    "RECRUITMENT_CAT",
    "IMAGING_CAT",
    "ENROLL_DATE",
    "ENROLL_CAT"
]

In [53]:
interest["screening"] = \
[
    "PATNO",
    "BIRTHDT",
    "GENDER",
    "APPRDX",
    "CURRENT_APPRDX",
    "HISPLAT",
    "RAINDALS",
    "RAASIAN",
    "RABLACK",
    "RAHAWOPI",
    "RAWHITE",
    "RANOS"
]

In [54]:
interest["socio"] = \
[
    "PATNO",
    "EDUCYRS",
    "HANDED"
]

In [55]:
interest["primary_diag"] = \
[
    "PATNO",
    "PRIMDIAG"
]

## Exportação de representação intermediária

Salva o arquivo contendo a representação de interesses em um arquivo pickle (pkl) para ser utilizado em outros `jupyter notebooks`.

In [56]:
with open('../ir/data_ppmi.pkl', 'wb') as f:
    pkl.dump(interest, f, pkl.HIGHEST_PROTOCOL)