#**Dementia Data Visualization II**

##**A: Import Data Set**

In [None]:
import pandas as pd
import altair as alt
import numpy as np

url = "http://www.oasis-brains.org/pdf/oasis_longitudinal.csv"
df = pd.read_csv(url)

df = df.reset_index(drop=True) 
df = df.rename({'M/F':'Gender'}, axis=1)
df['Group'] = df['Group'].replace(['Converted'], ['Demented']) 
df = df.drop(['MRI ID', 'Visit', 'Hand', 'MR Delay'], axis=1) 

df['Count'] = pd.Series([1 for x in range(len(df.index))])
df['Group_Gender'] = df['Group'] + '_' + df['Gender']
df['Group_01'] = df['Group'].map(lambda x: 1 if x == 'Demented' else 0)
df['Gender_01'] = df['Gender'].map(lambda x: 0 if x == 'F' else 1)

df.head(5)

Unnamed: 0,Subject ID,Group,Gender,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF,Count,Group_Gender,Group_01,Gender_01
0,OAS2_0001,Nondemented,M,87,14,2.0,27.0,0.0,1987,0.696,0.883,1,Nondemented_M,0,1
1,OAS2_0001,Nondemented,M,88,14,2.0,30.0,0.0,2004,0.681,0.876,1,Nondemented_M,0,1
2,OAS2_0002,Demented,M,75,12,,23.0,0.5,1678,0.736,1.046,1,Demented_M,1,1
3,OAS2_0002,Demented,M,76,12,,28.0,0.5,1738,0.713,1.01,1,Demented_M,1,1
4,OAS2_0002,Demented,M,80,12,,22.0,0.5,1698,0.701,1.034,1,Demented_M,1,1


##**B: MMSE**

### MMSE: Mini Mental State Examination

### **MMSE: Dementia**


In [None]:
def ex_b(selector):
  GM = df.groupby(['Group','MMSE']).size().reset_index()
  GM.columns = ['Group',	'MMSE',	'Number of patients']

  return alt.Chart(GM).mark_circle(
      opacity=0.6
  ).encode(
      alt.X('MMSE:O', axis=alt.Axis(labelAngle=0)),
      alt.Y('Group:N'),
      alt.Size('Number of patients:Q',
          scale=alt.Scale(range=[0, 2000]),
          legend=alt.Legend(orient="right", 
          values=[10, 20, 30])),
      color = alt.condition(selector, 'Group:N', alt.value('lightgray'), 
          scale=alt.Scale(
          domain = ['Demented', 'Nondemented'], 
          range = ['royalblue', 'lightseagreen']), 
          legend=None)
  ).properties(width=400, height=100
  ).add_selection(selector)

interval = alt.selection_interval()
ex_b(interval)

MMSE               | Prediction 
-------------------|------------------
0-25               | Demented
26-27              | Likely Demented
29-30              | Likely Nondemented

#### ในช่วงคะแนน MMSE 26-30 มีปัจจัยอะไรบ้างที่สามารถแยกระหว่างกลุ่ม demented กับ nondemented

### **Gender by MMSE: Dementia**

In [None]:
df_a3 = df.loc[:, ['MMSE', 'Gender', 'Group_01', 'Count']]
df_a3 = df_a3.groupby(by=['MMSE', 'Gender'])\
.agg({'Group_01':'mean'})\
.rename({'Group_01':'Dementia Ratio'}, axis=1)\
.reset_index().dropna()

alt.Chart(df_a3).mark_rect(opacity=0.75).encode(
    x = alt.X('MMSE:O'),
    y = alt.Y('Gender'),
    color = alt.Color('Dementia Ratio',
                      scale=alt.Scale(scheme='greenblue'))
).properties(width=600, height=60)

#### ที่คะแนน 28-30 เพศหญิงมีโอกาสเป็นโรคน้อยกว่าเพศชาย

### **Age & Gender by MMSE: Dementia**

In [None]:
def heat_point(gender, selector):
  df_a1 = df[df['Gender']==gender].loc[:, ['MMSE', 'Age', 'Group_01', 'Count']]
  df_a1 = df_a1.groupby(by=['MMSE', pd.cut(df_a1["Age"], np.arange(60,105,5))])\
  .agg({'Count':'size', 
        'Group_01':'mean'})\
  .rename({'Count':'Number of patients', 
          'Group_01':'Dementia Ratio'}, axis=1)\
  .reset_index().dropna()
  df_a1['Age'] = df_a1['Age'].map(lambda x: str(x)[1:3] + "-" + str(x)[4:7])

  df_a2 = df[df['Gender']==gender]
  df_a2 = df_a2.groupby(['Group','MMSE']).size().reset_index()
  df_a2.columns = ['Group',	'MMSE',	'Number of patients']

  rect = alt.Chart(df_a1).mark_rect().encode(
      alt.X('MMSE:O', title=None),
      alt.Y('Age:N', 
          sort='descending'),
      alt.Color('Dementia Ratio:Q',
          scale=alt.Scale(scheme='greenblue'),
          legend=alt.Legend(gradientLength=150)),
  ).properties(width=400, height=250,
               title=gender
  )

  circ = rect.mark_point().encode(
      alt.ColorValue('grey'),
      alt.Size('Number of patients', legend=alt.Legend(
          title='Number of patients', 
          values=[2, 8, 14]))
  )

  bubble = alt.Chart(df_a2).mark_circle(
      opacity=0.75
  ).encode(
      alt.X('MMSE:O', axis=alt.Axis(labelAngle=0)),
      alt.Y('Group:N'),
      alt.Size('Number of patients:Q',
          scale=alt.Scale(range=[0, 1000]),
          legend=None),
      color = alt.Color('Group:N', scale=alt.Scale(
          domain = ['Demented', 'Nondemented'], 
          range = ['royalblue', 'lightseagreen']), 
          legend=None)
  ).properties(width=400, height=40
  )

  return alt.vconcat(
      rect + circ,
      bubble
  ).resolve_legend(
      color="independent",
      size="independent"
  ).resolve_scale(
      color='independent'
  ).add_selection(selector)

interval = alt.selection_interval()

alt.hconcat(heat_point('M', interval), 
            heat_point('F', interval)).resolve_scale(color='shared')\
            .properties(title='MMSE, Age & Gender: Dementia')

## **C: ASF**

### ASF: Atlas Scaling Factor

###**ASF: Group**

In [None]:
AGD = df[df['Group']=='Demented']['ASF'].reset_index(drop=True)
AGN = df[df['Group']=='Nondemented']['ASF'].reset_index(drop=True)
AG = pd.DataFrame([AGD, AGN])
AG.index = ['Demented', 'Nondemented']

alt.Chart(AG.T).transform_fold(
    ['Demented', 'Nondemented'],
    as_ = ['Group', 'value']
).transform_density(
    density='value',
    bandwidth=.3,
    groupby=['Group'],
    extent= [0.2, 2],
    counts = True,
    steps=200
).mark_area().encode(
    alt.X('value:Q', title='ASF'),
    alt.Y('density:Q', title='Number of patients', stack=None),
    alt.Color('Group:N', scale=alt.Scale(
        domain=['Demented', 'Nondemented'], 
        range=['red', 'dodgerblue'])),
    opacity=alt.value(0.6)
).properties(width=300, height=250)

#### ASF ไม่มีสามารถนำมาใช้แยกกลุ่ม demented และ nondemented ออกจากกัน

###**MMSE by ASF: Group**

In [None]:
df_d = df.loc[:, ['MMSE', 'ASF', 'Group_01']]
df_d['ASF'] = df_d['ASF'].map(lambda i: 2*round(i/2, 2))
df_d = df_d.groupby(['MMSE','ASF']).mean('Group_01').reset_index() \
  .rename({'Group_01':'Propbability of Demented'}, axis=1)

alt.Chart(df_d).mark_rect().encode(
    alt.X('MMSE', type='ordinal'),
    alt.Y('ASF', type='ordinal', sort='descending', bin=alt.Bin(maxbins=10)),
    alt.Color('Propbability of Demented:Q',
        scale=alt.Scale(scheme='greenblue'))
)

## **D: nWBV**

### nWBV: Normalized Whole Brain Volume

### **Age & Group: nWBV**

In [None]:
base = alt.Chart(df).mark_point().encode(
    x = alt.X('Age', scale=alt.Scale(domain=(50,100))),
    y = alt.Y('nWBV', scale=alt.Scale(domain=(0.65, 0.85))),
    color = 'Group:N'
).properties(width=400, height=200)

base + base.transform_regression('Age', 'nWBV', groupby=['Group']).mark_line(size=3)

### **Age & Group_Gender: nWBV**

In [None]:
selection = alt.selection_multi(fields=['Group_Gender'])
color = alt.condition(selection, 
                      alt.Color('Group_Gender:N', scale=alt.Scale(
                          domain = ['Demented_M', 'Nondemented_M', 
                                    'Demented_F', 'Nondemented_F'], 
                          range = ['darkblue', 'royalblue', 
                                   'darkorchid', '#e377c2']), 
                          legend=None),
                      alt.value('lightgray')
                     )

base = alt.Chart(df).mark_point(opacity=0.5).encode(
    x = alt.X('Age', scale=alt.Scale(domain=(55,100))),
    y = alt.Y('nWBV', scale=alt.Scale(domain=(0.62, 0.85))),
    color = color
).properties(width=650, height=300
)

scatter = base + base.transform_regression('Age', 'nWBV', groupby=['Group_Gender']).mark_line(size=3)\
.properties(title='Age & Group_Gender: nWBV')

bars = alt.Chart(df).mark_bar(
    opacity=0.75
).encode(
    x = alt.X('sum(Count)', title='Number of patients'),
    y = alt.Y('Gender'),
    color = color
).properties(width=650, height=100).add_selection(selection)

text = alt.Chart(df).mark_text(align='right', baseline='middle').encode(
    y = alt.Y('Gender', title=None),
    x = alt.X('sum(Count)', stack=True),
    text = 'Group_Gender')

alt.vconcat(scatter, alt.layer(bars, text))

# **E: Feature MI Scores**

In [None]:
features = ['Group', 'Gender',	'Age',	'EDUC',	'SES',	
            'MMSE',	'eTIV', 'nWBV',	'ASF']

X = df.copy().loc[:, features].dropna()

for colname in X.select_dtypes("object"):
  X[colname], _ = X[colname].factorize()

y = X.pop("Group")

In [None]:
from sklearn.feature_selection import mutual_info_regression

def make_mi_scores(X, y, discrete_features):
    mi_scores = mutual_info_regression(X, y, discrete_features=discrete_features)
    mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)
    mi_scores = mi_scores.sort_values(ascending=False)
    return mi_scores

mi_scores = make_mi_scores(X, y, 'auto')
mi_scores

MMSE      0.278738
eTIV      0.082153
nWBV      0.072480
Gender    0.063961
ASF       0.063181
EDUC      0.057350
Age       0.005106
SES       0.002627
Name: MI Scores, dtype: float64