In [None]:
import pandas as pd
import altair as alt

url = "http://www.oasis-brains.org/pdf/oasis_longitudinal.csv"
df = pd.read_csv(url)

df = df.loc[df['Visit']==1]
df = df.reset_index(drop=True) 
df['Group'] = df['Group'].replace(['Converted'], ['Demented']) 
df = df.drop(['MRI ID', 'Visit', 'Hand', 'MR Delay'], axis=1) 
df

Unnamed: 0,Subject ID,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,Nondemented,M,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0002,Demented,M,75,12,,23.0,0.5,1678,0.736,1.046
2,OAS2_0004,Nondemented,F,88,18,3.0,28.0,0.0,1215,0.710,1.444
3,OAS2_0005,Nondemented,M,80,12,4.0,28.0,0.0,1689,0.712,1.039
4,OAS2_0007,Demented,M,71,16,,28.0,0.5,1357,0.748,1.293
...,...,...,...,...,...,...,...,...,...,...,...
145,OAS2_0182,Demented,M,73,12,,23.0,0.5,1661,0.698,1.056
146,OAS2_0183,Nondemented,F,66,13,2.0,30.0,0.0,1495,0.746,1.174
147,OAS2_0184,Demented,F,72,16,3.0,24.0,0.5,1354,0.733,1.296
148,OAS2_0185,Demented,M,80,16,1.0,28.0,0.5,1704,0.711,1.030


In [None]:
# Gender vs Group 1
GG = df.loc[:, ['M/F', 'Group']]
GG['GG_count'] = pd.Series([1 for x in range(len(df.index))])

alt.Chart(GG).mark_bar(
    opacity=0.75
).encode(
    y = 'Group:N',
    x = alt.X('sum(GG_count)', title='Number of patients'),
    color = alt.Color('M/F', scale=alt.Scale(
        domain = ['M', 'F'], 
        range = ['steelblue', 'pink']), title='Gender')
).properties(width=400, height=200)

In [None]:
# Gender vs Group 2
alt.Chart(GG).mark_bar(
    opacity=0.75
).encode(
    x = alt.X('sum(GG_count)', title='Number of patients'),
    y = alt.Y('M/F', title='Gender'),
    color = alt.Color('Group', scale=alt.Scale(
        domain = ['Demented', 'Nondemented'], 
        range = ['salmon', 'mediumturquoise']), title='Group')
).properties(width=400, height=200)

In [None]:
# Group vs Age
GAD = df[df['Group']=='Demented']['Age'].reset_index(drop=True)
GAN = df[df['Group']=='Nondemented']['Age'].reset_index(drop=True)
GA = pd.DataFrame([GAD, GAN])
GA.index = ['Demented', 'Nondemented']

alt.Chart(GA.T).transform_fold(
    ['Demented', 'Nondemented'],
    as_ = ['Group', 'value']
).transform_density(
    density='value',
    bandwidth=3,
    groupby=['Group'],
    extent= [50, 100],
    counts = True,
    steps=200
).mark_area().encode(
    alt.X('value:Q', title='Age'),
    alt.Y('density:Q', title='Number of patients', stack=None),
    alt.Color('Group:N', scale=alt.Scale(
        domain=['Demented', 'Nondemented'], 
        range=['yellow', 'skyblue'])),
    opacity=alt.value(0.6)
).properties(width=400, height=200)

In [None]:
# Group vs MMSE
GM = df.groupby(['Group','MMSE']).size().reset_index()
GM.columns = ['Group',	'MMSE',	'Number of patients']

alt.Chart(GM).mark_circle(
    opacity=0.6
).encode(
    alt.X('MMSE:O', axis=alt.Axis(labelAngle=0)),
    alt.Y('Group:N'),
    alt.Size('Number of patients:Q',
        scale=alt.Scale(range=[0, 2555]),
        legend=alt.Legend(orient="right", 
        values=[10, 20, 30])),
    color = alt.Color('Group:N', scale=alt.Scale(
        domain = ['Demented', 'Nondemented'], 
        range = ['orchid', 'lightcoral']), 
        legend=None)
).properties(width=400, height=100)

In [None]:
# EDUC vs Group 1
EG = df.groupby(['Group','EDUC']).size().reset_index()
EG.columns = ['Group',	'EDUC',	'Number of patients']

alt.Chart(EG).mark_bar(
    opacity=0.6
).encode(
    alt.X('EDUC:O', axis=alt.Axis(labelAngle=0)),
    alt.Y('Number of patients:Q', stack=None),
    color = alt.Color('Group:N', scale=alt.Scale(
        domain = ['Demented', 'Nondemented'], 
        range = ['yellow', 'skyblue']), 
        legend=None)
).properties(width=400, height=100)

In [None]:
# EDUC vs Group 2
GED = df[df['Group']=='Demented']['EDUC'].reset_index(drop=True)
GEN = df[df['Group']=='Nondemented']['EDUC'].reset_index(drop=True)
GE = pd.DataFrame([GED, GEN])
GE.index = ['Demented', 'Nondemented']

alt.Chart(GE.T).transform_fold(
    ['Demented', 'Nondemented'],
    as_ = ['Group', 'value']
).transform_density(
    density='value',
    bandwidth=3,
    groupby=['Group'],
    extent= [0, 30],
    counts = True,
    steps=200
).mark_area().encode(
    alt.X('value:Q', title='EDUC'),
    alt.Y('density:Q', title='Number of patients', stack=None),
    alt.Color('Group:N', scale=alt.Scale(
        domain=['Demented', 'Nondemented'], 
        range=['red', 'royalblue'])),
    opacity=alt.value(0.6)
).properties(width=400, height=200)

In [None]:
# SES vs Group
GED = df[df['Group']=='Demented']['SES'].reset_index(drop=True)
GEN = df[df['Group']=='Nondemented']['SES'].reset_index(drop=True)
GE = pd.DataFrame([GED, GEN])
GE.index = ['Demented', 'Nondemented']

alt.Chart(GE.T).transform_fold(
    ['Demented', 'Nondemented'],
    as_ = ['Group', 'value']
).transform_density(
    density='value',
    bandwidth=3,
    groupby=['Group'],
    extent= [0, 16],
    counts = True,
    steps=200
).mark_area().encode(
    alt.X('value:Q', title='SES'),
    alt.Y('density:Q', title='Number of patients', stack=None),
    alt.Color('Group:N', scale=alt.Scale(
        domain=['Demented', 'Nondemented'], 
        range=['yellow', 'skyblue'])),
    opacity=alt.value(0.6)
).properties(width=400, height=200)

In [None]:
# nWBV vs Age vs Group
base = alt.Chart(df).mark_point().encode(
    x = alt.X('Age', scale=alt.Scale(domain=(50,100))),
    y = alt.Y('nWBV', scale=alt.Scale(domain=(0.65, 0.85))),
    color = 'Group:N'
).properties(width=400, height=200)

base + base.transform_regression('Age', 'nWBV', groupby=['Group']).mark_line(size=3)

In [None]:
# ASF vs Group
AGD = df[df['Group']=='Demented']['ASF'].reset_index(drop=True)
AGN = df[df['Group']=='Nondemented']['ASF'].reset_index(drop=True)
AG = pd.DataFrame([AGD, AGN])
AG.index = ['Demented', 'Nondemented']

alt.Chart(AG.T).transform_fold(
    ['Demented', 'Nondemented'],
    as_ = ['Group', 'value']
).transform_density(
    density='value',
    bandwidth=.3,
    groupby=['Group'],
    extent= [0.2, 2],
    counts = True,
    steps=200
).mark_area().encode(
    alt.X('value:Q', title='ASF'),
    alt.Y('density:Q', title='Number of patients', stack=None),
    alt.Color('Group:N', scale=alt.Scale(
        domain=['Demented', 'Nondemented'], 
        range=['red', 'dodgerblue'])),
    opacity=alt.value(0.6)
).properties(width=300, height=250)

In [None]:
# eTIV vs Group
EGD = df[df['Group']=='Demented']['eTIV'].reset_index(drop=True)
EGN = df[df['Group']=='Nondemented']['eTIV'].reset_index(drop=True)
EG = pd.DataFrame([EGD, EGN])
EG.index = ['Demented', 'Nondemented']

alt.Chart(EG.T).transform_fold(
    ['Demented', 'Nondemented'],
    as_ = ['Group', 'value']
).transform_density(
    density='value',
    bandwidth=100,
    groupby=['Group'],
    extent= [900, 2100],
    counts = True,
    steps=200
).mark_area().encode(
    alt.X('value:Q', title='eTIV'),
    alt.Y('density:Q', title='Number of patients', stack=None),
    alt.Color('Group:N', scale=alt.Scale(
        domain=['Demented', 'Nondemented'], 
        range=['red', 'dodgerblue'])),
    opacity=alt.value(0.6)
).properties(width=300, height=250)