In [2]:
import seaborn as sns
import pandas as pd
import numpy as np



# Dicionário de Dados - fMRI

### O dataset fmri contém um conjunto de dados sobre ressonância funcinal ou ressonância magnética funcional. 

In [8]:
# Carregando o dataset
fmri = sns.load_dataset('fmri')
# Retorno das 10 primeiras linhas do dataset
fmri.head(10)

Unnamed: 0,subject,timepoint,event,region,signal
0,s13,18,stim,parietal,-0.017552
1,s5,14,stim,parietal,-0.080883
2,s12,18,stim,parietal,-0.081033
3,s11,18,stim,parietal,-0.046134
4,s10,18,stim,parietal,-0.03797
5,s9,18,stim,parietal,-0.103513
6,s8,18,stim,parietal,-0.064408
7,s7,18,stim,parietal,-0.060526
8,s6,18,stim,parietal,-0.007029
9,s5,18,stim,parietal,-0.040557


In [16]:
#retorno das 10 últimas linhas do dataframe
fmri.tail(10)

Unnamed: 0,subject,timepoint,event,region,signal
1054,s5,8,cue,frontal,-0.028292
1055,s4,8,cue,frontal,-0.160821
1056,s3,8,cue,frontal,-0.033848
1057,s2,8,cue,frontal,-0.069666
1058,s1,8,cue,frontal,-0.136059
1059,s0,8,cue,frontal,0.018165
1060,s13,7,cue,frontal,-0.02913
1061,s12,7,cue,frontal,-0.004939
1062,s11,7,cue,frontal,-0.025367
1063,s0,0,cue,parietal,-0.006899


#### Analisando os valores diferentes que cada feature pode receber:

In [18]:
fmri["subject"].unique()

array(['s13', 's5', 's12', 's11', 's10', 's9', 's8', 's7', 's6', 's4',
       's3', 's2', 's1', 's0'], dtype=object)

In [19]:
fmri["timepoint"].unique()

array([18, 14, 17,  9, 16, 15,  0, 13, 12, 11, 10,  3,  7,  8,  2,  6,  5,
        4,  1])

In [21]:
fmri["event"].unique()

array(['stim', 'cue'], dtype=object)

In [23]:
fmri["region"].unique()

array(['parietal', 'frontal'], dtype=object)

In [25]:
fmri["signal"].nunique()

1064

In [9]:
fmri.describe()

Unnamed: 0,timepoint,signal
count,1064.0,1064.0
mean,9.0,0.00354
std,5.479801,0.09393
min,0.0,-0.255486
25%,4.0,-0.04607
50%,9.0,-0.013653
75%,14.0,0.024293
max,18.0,0.564985


In [12]:
fmri.mean()

  fmri.mean()


timepoint    9.00000
signal       0.00354
dtype: float64

In [13]:
fmri.median()

  fmri.median()


timepoint    9.000000
signal      -0.013653
dtype: float64

In [19]:
dicionario = pd.DataFrame([
   { "variavel": "subject",
        "descricao": "classificação da ressonância",
        "tipo":"qualitativa",
        "subtipo":"nominal"
    },{
        "variavel": "timepoint",
        "descricao": "-",
        "tipo":"quantitativa",
        "subtipo":"ordinal"
    },{
        "variavel": "event",
        "descricao": "tipo de estimulação",
        "tipo":"qualitativa",
        "subtipo":"nominal"
    },{
        "variavel": "region",
        "descricao": "região do cerébro em que a ressonância foi feita",
        "tipo":"qualitativa",
        "subtipo":"nominal"
    },{
        "variavel": "signal",
        "descricao": "atividade cerebral",
        "tipo":"quantitativa",
        "subtipo":"continua"
    }
]
)

#### Descricão de cada uma das features do dataset

In [20]:
pd.DataFrame(dicionario)

Unnamed: 0,variavel,descricao,tipo,subtipo
0,subject,classificação da ressonância,qualitativa,nominal
1,timepoint,-,quantitativa,ordinal
2,event,tipo de estimulação,qualitativa,nominal
3,region,região do cerébro em que a ressonância foi feita,qualitativa,nominal
4,signal,atividade cerebral,quantitativa,continua


In [23]:
features_qualitativas = dicionario[dicionario['tipo'] == 'qualitativa'].variavel.tolist()
features_quantitativas = dicionario[dicionario['tipo'] == 'quantitativa'].variavel.tolist()

In [24]:
fmri[features_qualitativas].mode


<bound method DataFrame.mode of      subject event    region
0        s13  stim  parietal
1         s5  stim  parietal
2        s12  stim  parietal
3        s11  stim  parietal
4        s10  stim  parietal
...      ...   ...       ...
1059      s0   cue   frontal
1060     s13   cue   frontal
1061     s12   cue   frontal
1062     s11   cue   frontal
1063      s0   cue  parietal

[1064 rows x 3 columns]>

In [25]:
fmri[features_quantitativas].mode

<bound method DataFrame.mode of       timepoint    signal
0            18 -0.017552
1            14 -0.080883
2            18 -0.081033
3            18 -0.046134
4            18 -0.037970
...         ...       ...
1059          8  0.018165
1060          7 -0.029130
1061          7 -0.004939
1062          7 -0.025367
1063          0 -0.006899

[1064 rows x 2 columns]>

#### média e mediana da feature 'signal':

In [28]:
#média
fmri[features_quantitativas].mean()

timepoint    9.00000
signal       0.00354
dtype: float64

In [27]:
#mediana
fmri[features_quantitativas].median()

timepoint    9.000000
signal      -0.013653
dtype: float64

In [26]:
fmri.groupby('region')[features_quantitativas].agg(['mean','median','std'])

Unnamed: 0_level_0,timepoint,timepoint,timepoint,signal,signal,signal
Unnamed: 0_level_1,mean,median,std,mean,median,std
region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
frontal,9.0,9.0,5.482381,0.001298,-0.008032,0.075967
parietal,9.0,9.0,5.482381,0.005781,-0.019959,0.109001
