In [1]:
import altair as alt
import pandas as pd

In [2]:
dataset = pd.read_csv("data2.csv")

In [3]:
dataset.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,Healthy
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,Healthy
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,Healthy
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,Healthy
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,Healthy


In [4]:
import altair as alt
from vega_datasets import data

df = data.seattle_weather()
df.shape
print(df)

alt.Chart(df).mark_line().transform_fold(
    fold=['temp_max', 'temp_min'], 
    as_=['variable', 'value']
).encode(
    x='yearmonth(date):T',
    y='max(value):Q',
    color='variable:N'
)

           date  precipitation  temp_max  temp_min  wind  weather
0    2012-01-01            0.0      12.8       5.0   4.7  drizzle
1    2012-01-02           10.9      10.6       2.8   4.5     rain
2    2012-01-03            0.8      11.7       7.2   2.3     rain
3    2012-01-04           20.3      12.2       5.6   4.7     rain
4    2012-01-05            1.3       8.9       2.8   6.1     rain
...         ...            ...       ...       ...   ...      ...
1456 2015-12-27            8.6       4.4       1.7   2.9      fog
1457 2015-12-28            1.5       5.0       1.7   1.3      fog
1458 2015-12-29            0.0       7.2       0.6   2.6      fog
1459 2015-12-30            0.0       5.6      -1.0   3.4      sun
1460 2015-12-31            0.0       5.6      -2.1   3.5      sun

[1461 rows x 6 columns]


In [5]:

input_dropdown = alt.binding_select(options=[None,'Patients', 'Missing'])
highlight = alt.selection(type='single',bind=input_dropdown, name='Classification',fields=['Classification'])
brush = alt.selection(type='interval')


scatter = alt.Chart(dataset).transform_fold(
    ['1', '2'],
    as_=['variable', 'Value']
).mark_circle(size=60).encode(
    x='Insulin',
    y='HOMA',
    color=alt.Color('Classification:N'),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=350,
    height=260,
    selection=highlight
).add_selection(
    brush
)

scatter2 = alt.Chart(dataset).transform_fold(
    ["1,2","A, B"]
).mark_circle(size=60).encode(
    x='Insulin',
    y='HOMA',
    color=alt.Color('Classification:N'),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=350,
    height=260,
    selection=highlight
).add_selection(
    brush
)
scatter

In [6]:
input_dropdown = alt.binding_select(options=[None,'Healthy Control','Patient'],labels=['All','Healthy Control','Patient'])
highlight = alt.selection(type='single',bind=input_dropdown, name='Classification',fields=['Classification'])
brush = alt.selection(type='interval')


scatter = alt.Chart(dataset).mark_circle(size=60).encode(
    x='Insulin',
    y='HOMA',
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=350,
    height=260,
    selection=highlight
).add_selection(
    brush
)

tick_axis = alt.Axis()

x_ticks1 = alt.Chart(dataset).mark_tick().encode(
    alt.X('Glucose:Q', axis=tick_axis, scale=alt.Scale(domain=[50, 210])),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=350,
).add_selection(
    highlight
).add_selection(
    brush
)

x_ticks2 = alt.Chart(dataset).mark_point().encode(
    alt.X('Resistin:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=350,
).add_selection(
    highlight
).add_selection(
    brush
)

x_ticks3 = alt.Chart(dataset).mark_tick().encode(
    alt.X('HOMA:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=350,
).add_selection(
    highlight
).add_selection(
    brush
)

x_ticks4 = alt.Chart(dataset).mark_point().encode(
    alt.X('Insulin:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=350,
).add_selection(
    highlight
).add_selection(
    brush
)

scatter & x_ticks1 |  x_ticks2 & x_ticks3 & x_ticks4



In [7]:
input_dropdown = alt.binding_select(options=[None,'Healthy Control','Patient'],labels=['All','Healthy Control','Patient'])
highlight = alt.selection(type='single',bind=input_dropdown, name='Classification',fields=['Classification'])
brush = alt.selection(type='interval')


parallel = alt.Chart(dataset).transform_window(
    index='count()'
).transform_fold(
    ['Age','BMI','Glucose','Insulin','HOMA','Leptin','Adiponectin','Resistin','MCP']
).mark_line().encode(
    x='key:N',
    y='value:Q',
    color='Classification:N',
    detail='index:N',
    opacity=alt.condition(highlight, alt.value(1.0), alt.value(0.1)),
).properties(
    width=600,
    height=300,
    selection=highlight
)

tick_axis = alt.Axis()

x_points = alt.Chart(dataset).mark_point().encode(
    alt.X('Glucose:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color='Classification:N',
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=90,
    width=400,
).add_selection(
    highlight
).add_selection(
    brush
)


scatter = alt.Chart(dataset).mark_circle(size=60).encode(
    x='Insulin',
    y='HOMA',
    color='Classification:N',
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=400,
    height=200,
    selection=highlight
).add_selection(
    brush
)


box = alt.Chart(dataset).mark_boxplot().encode(
     y=alt.Y('Classification:N'),
     x=alt.X('Age:Q', scale=alt.Scale(domain=[20, 90])),
     opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1)),
     color='Classification:N'
).properties(
    width=600,
    height=100,
    selection=highlight
)

x_ticks = alt.Chart(dataset).mark_tick().encode(
    alt.X('Resistin:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color='Classification:N',
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=400,
).add_selection(
    highlight
).add_selection(
    brush
)


parallel & box | x_points & scatter & x_ticks


## Final Prototypes

In [8]:

input_dropdown = alt.binding_select(options=[None,'Healthy Control','Patient'],labels=['All','Healthy Control','Patient'])
highlight = alt.selection(type='single',bind=input_dropdown, name='Classification',fields=['Classification'])
brush = alt.selection(type='interval')


parallel = alt.Chart(dataset).transform_window(
    index='count()'
).transform_fold(
    ['Age','BMI','Glucose','Insulin','HOMA','Leptin','Adiponectin','Resistin','MCP']
).mark_line().encode(
    x='key:N',
    y='value:Q',
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    detail='index:N',
    opacity=alt.condition(highlight, alt.value(1.0), alt.value(0.1)),
).properties(
    width=450,
    height=300,
    selection=highlight
)

tick_axis = alt.Axis()

x_points = alt.Chart(dataset).mark_point().encode(
    alt.X('Age:Q', axis=tick_axis,scale=alt.Scale(domain=[20, 100])),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=90,
    width=350,
).add_selection(
    highlight
).add_selection(
    brush
)


scatter = alt.Chart(dataset).mark_circle(size=60).encode(
    x='Insulin',
    y='HOMA',
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=350,
    height=200,
    selection=highlight
).add_selection(
    brush
)


box = alt.Chart(dataset).mark_boxplot().encode(
     y=alt.Y('Classification:N'),
     x=alt.X('BMI:Q', scale=alt.Scale(domain=[16, 40])),
     color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
     opacity=alt.condition(highlight , alt.value(1.0), alt.value(0.1))
).properties(
    width=450,
    height=100,
    selection=highlight
)

x_ticks = alt.Chart(dataset).mark_tick().encode(
    alt.X('MCP:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=350,
).add_selection(
    highlight
).add_selection(
    brush
)


parallel & box | x_points & scatter & x_ticks



In [9]:

input_dropdown = alt.binding_select(options=[None,1,2],labels=['All','1. Healthy Control','2. Patients'])
highlight = alt.selection(type='single',bind=input_dropdown, name='Classification',fields=['Classification'])
brush = alt.selection(type='interval')



scatter1 = alt.Chart(dataset).mark_circle(size=60).encode(
    x='Insulin',
    y='HOMA',
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=430,
    height=350,
    selection=highlight
).add_selection(
    brush
)

tick_axis = alt.Axis()

x_points = alt.Chart(dataset).mark_point().encode(
    alt.X('Age:Q', axis=tick_axis,scale=alt.Scale(domain=[20, 100])),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=90,
    width=330,
).add_selection(
    highlight
).add_selection(
    brush
)


scatter2 = alt.Chart(dataset).mark_circle(size=60).encode(
    x='Leptin',
    y='Adiponectin',
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=330,
    height=200,
    selection=highlight
).add_selection(
    brush
)


box = alt.Chart(dataset).mark_boxplot().encode(
     y=alt.Y('Classification:N'),
     x=alt.X('BMI:Q', scale=alt.Scale(domain=[16, 40])),
     color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
     opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    width=430,
    height=100,
    selection=highlight
)

x_ticks = alt.Chart(dataset).mark_tick().encode(
    alt.X('MCP:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=330,
).add_selection(
    highlight
).add_selection(
    brush
)


scatter1 & box | x_points & scatter2 & x_ticks

In [10]:

input_dropdown = alt.binding_select(options=[None,'Healthy Control','Patient'],labels=['All','Healthy Control','Patient'])
highlight = alt.selection(type='single',bind=input_dropdown, name='Classification',fields=['Classification'])
brush = alt.selection(type='interval')



scatter1 = alt.Chart(dataset).mark_circle(size=60).encode(
    x='Insulin',
    y='HOMA',
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=390,
    height=250,
    selection=highlight
).add_selection(
    brush
)

tick_axis = alt.Axis()

x_points = alt.Chart(dataset).mark_point().encode(
    alt.X('Age:Q', axis=tick_axis,scale=alt.Scale(domain=[20, 100])),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=390,
).add_selection(
    highlight
).add_selection(
    brush
)


scatter2 = alt.Chart(dataset).mark_circle(size=60).encode(
    x='Glucose',
    y='Resistin',
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.1)),
).properties(
    width=390,
    height=250,
    selection=highlight
).add_selection(
    brush
)


box = alt.Chart(dataset).mark_boxplot().encode(
     y=alt.Y('Classification:N'),
     x=alt.X('BMI:Q', scale=alt.Scale(domain=[16, 40])),
     color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
     opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    width=390,
    height=100,
    selection=highlight
)

x_ticks = alt.Chart(dataset).mark_tick().encode(
    alt.X('MCP:Q', axis=tick_axis),
    alt.Y('Classification:N', title='Classification', axis=tick_axis),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='tableau10')),
    opacity=alt.condition(highlight | brush , alt.value(1.0), alt.value(0.1))
).properties(
    height=100,
    width=390,
).add_selection(
    highlight
).add_selection(
    brush
)

bars = alt.Chart(dataset).mark_bar().encode(
    y='Classification:N',
    x='count(Classification):Q',
    color='Classification:N',
    opacity=alt.condition(highlight | brush, alt.value(1.0), alt.value(0.5))
).properties(
    width=390,
    height=100,
).transform_filter(
    brush
).add_selection(
    highlight
)



scatter1 &  x_ticks & box | scatter2 & x_points & bars

In [11]:
alt.Chart(dataset).mark_circle().encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
    color=alt.Color('Classification:N', scale=alt.Scale(scheme='set2')),
).properties(
    width=150,
    height=150
).repeat(
    row=['Age','BMI','Glucose','Insulin','HOMA','Leptin','Adiponectin','Resistin','MCP'],
    column=['Age','BMI','Glucose','Insulin','HOMA','Leptin','Adiponectin','Resistin','MCP']
)

In [12]:
alt.Chart(dataset).mark_rect().encode(
    alt.X('Resistin:Q', bin=alt.Bin(maxbins=60)),
    alt.Y('Glucose:Q', bin=alt.Bin(maxbins=40)),
    alt.Color('count():Q', scale=alt.Scale(scheme='greenblue'))
)


In [13]:
input_dropdown = alt.binding_select(options=[1,2])
highlight = alt.selection(type='single',bind=input_dropdown, name='Classification',fields=['Classification'])


alt.Chart(dataset).mark_line().encode(
    x=alt.X('Resistin:Q',scale=alt.Scale(domain=[0, 60])),
    y=alt.Y('Glucose:Q'),
    color='Classification:N',
    opacity=alt.condition(highlight, alt.value(1.0), alt.value(0.1)),
).properties(
    width=300,
    height=200,
    selection=highlight
).interactive()

In [14]:

# Convert this grid to columnar data expected by Altair
alt.Chart(dataset).mark_rect().encode(
    x='Age:Q',
    y='BMI:Q',
    color='Classification:N'
)

In [15]:
alt.Chart(dataset).mark_boxplot().encode(
     y=alt.Y('Classification:N'),
     x=alt.X('BMI:Q'),
     color='Classification:N'
)