# Visualizations for Patient Data

In [1]:
import altair as alt
import pandas as pd

## Data Set
This is an artificial dataset that contains patient id, age, sex, city, language, ever_smoker, current_smoker, and etc.

In [2]:
patient = pd.read_csv('../../artificial_patient_data.csv', header=[0])
cols = patient.columns.tolist()

patient.head() # Preview data

Unnamed: 0,patent_id,age,sex,state,race,ever_smoker,current_smoker,smoke_quit_date,pack_years,pregnant,hx_cancer,hx_cancer_type,test_result,test_date
0,1,29,female,HI,Hispanic and Latino Americans,no,no,2/19/18,0,no,no,,negative,1/6/20
1,2,53,female,NE,white,no,no,7/5/16,0,no,no,,presumptive positive,1/14/20
2,3,21,male,MT,white,no,no,9/12/19,0,no,no,,confirmed,1/17/20
3,4,54,male,AK,white,no,no,11/30/17,0,no,no,,confirmed,2/18/20
4,5,23,male,AR,Black or African American,no,no,11/23/17,0,no,no,,negative,2/11/20


In [3]:
# setup renderer for Jupyter Notebooks (not needed for Juptyer Lab)
# alt.renderers.enable('notebook')

## Demographics

### Simple Histograms & Bar Charts
These charts show simple demographic information of people. An orange vertical line in a histogram indicates the mean value.

In [5]:
sex_scale = alt.Scale(
    domain=('male', 'female'),
    range=["steelblue", "salmon"]
)

slider = alt.binding_range(min=1900, max=2000, step=10)

select_year = alt.selection_single(name="test_date", fields=['test_date'],
                                   bind=slider, init={'test_date': 2000})

confirmed_by_age = alt.Chart(patient).mark_bar().encode(
    x=alt.X('sex:N', title=None),
    y=alt.Y('count()', scale=alt.Scale(domain=(0, 60)), title="Number of Confirmed Cases"),
    color=alt.Color('sex:N', scale=sex_scale, title="Sex"),
    column=alt.Column('age:O', bin=alt.Bin(maxbins=10), title="Age")
).properties(
    title="Confirmed Cases By Age",
    width=60
)

confirmed_by_age.configure_title(fontSize=20,fontWeight=300)

In [6]:
scale_test_result = alt.Scale(
    domain=["confirmed", "presumptive positive", "negative"],
    range=['#E05759','#F28E2B','#4E79A7']
)

viz_test = alt.Chart(patient).mark_bar().encode(
    x=alt.X('test_result:N'),
    y="count()",
    color=alt.Color('test_result:N', scale=scale_test_result)
).properties(
    title="Distribution of COVID-19 Test Results",
    width=300
)

viz_age = alt.Chart(patient).mark_bar().encode(
    alt.X("age:Q", bin=True, scale=alt.Scale(domain=[0,100])),
    y='count()',
).properties(
    title="Age Distribution",
    width=300
)

rule = alt.Chart(patient).mark_rule(color='orange').encode(
    x='mean(age):Q',
    size=alt.value(3)
)

viz_sex = alt.Chart(patient).mark_bar().encode(
    x="sex:N",
    y='count()',
).properties(
    width=150,
    title="Sex Distribution"
)

viz_race = alt.Chart(patient).mark_bar().encode(
    alt.X("race:N"),
    y='count()',
).properties(
    height=300, 
    width=250,
    title="Race Distribution"
)

viz_pregnant = alt.Chart(patient).mark_bar().encode(
    alt.X("pregnant:N", sort='-y'),
    y='count()',
).properties(
    height=300, 
    width=150,
    title="Pregnancy Distribution"
)

((viz_test | viz_age + rule) & (viz_sex | viz_race | viz_pregnant)).configure_title(fontSize=20,fontWeight=300)

### Interactive Crossfilter
This allows you to draw a rectangular brush in a chart using mouse dragging interactions that help you find the corresponding data in other charts. For example, you can see the age, sex, and race distributions of the patients of "confirmed" cases. The position of the orange vertical line indicating the mean value in a histrogram updates upon adjusting the crossfilter.

In [8]:
brush_x = alt.selection(type='interval', encodings=['x'])

# Add brush to each chart
viz_test_with_brush = alt.layer(
    viz_test.encode(color=alt.value("lightgray")),
    viz_test.transform_filter(brush_x).add_selection(brush_x)
)

viz_age_with_brush = alt.layer(
    viz_age.encode(color=alt.value("lightgray")),
    viz_age.transform_filter(brush_x).add_selection(brush_x)
)

rule_with_brush = rule.transform_filter(brush_x)

viz_sex_with_brush = alt.layer(
    viz_sex.encode(color=alt.value("lightgray")),
    viz_sex.transform_filter(brush_x).add_selection(brush_x)
)

viz_race_with_brush = alt.layer(
    viz_race.encode(color=alt.value("lightgray")),
    viz_race.transform_filter(brush_x).add_selection(brush_x)
)

viz_pregnant_with_brush = alt.layer(
    viz_pregnant.encode(color=alt.value("lightgray")),
    viz_pregnant.transform_filter(brush_x).add_selection(brush_x)
)

(
    (viz_test_with_brush | viz_age_with_brush + rule_with_brush) & (viz_sex_with_brush | viz_race_with_brush | viz_pregnant_with_brush)
).configure_title(fontSize=20,fontWeight=300)

In [9]:
viz_age_male = alt.Chart(patient).transform_filter(
    alt.FieldEqualPredicate(field="sex", equal="male")
).mark_bar(opacity=0.5).encode(
    alt.X("age:Q", bin=True, scale=alt.Scale(domain=[0,100])),
    y='count()',
    color=alt.value("steelblue")
)

viz_age_female = alt.Chart(patient).transform_filter(
    alt.FieldEqualPredicate(field="sex", equal="female")
).mark_bar(opacity=0.5).encode(
    alt.X("age:Q", bin=True, scale=alt.Scale(domain=[0,100])),
    y='count()',
    color=alt.value("salmon")
)

(viz_age_male + viz_age_female).properties(
    title="Age Distribution by Sex"
).configure_title(fontSize=20,fontWeight=300)

In [10]:
### Confirmed Cases Over Time

In [13]:
test_over_time = alt.Chart(patient).transform_window(
    cumulative_count="count()",
    sort=[{"field": "test_date"}],
).mark_line(size=3).encode(
    x='test_date:T',
    y=alt.Y('cumulative_count:Q', title=None),
    color=alt.value("steelblue")
)

test_over_time_point = test_over_time.mark_point(size=8)

confirm_over_time = alt.Chart(patient).transform_filter(
    alt.FieldEqualPredicate(field="test_result", equal="confirmed")
).transform_window(
    cumulative_count="count()",
    sort=[{"field": "test_date"}],
).mark_line(size=3).encode(
    x=alt.X('test_date:T', title="Date"),
    y='cumulative_count:Q',
    color=alt.value("salmon")
)

confirm_over_time_point = confirm_over_time.mark_point(size=8)

# Selection that get the nearest point in x-axis
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['test_date'], empty='none')

# Selectors to get x position of the cursor
selectors = alt.Chart(patient).mark_point().encode(
    x='test_date:T',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# Show points upon selection
points1 = test_over_time.mark_point(size=30).encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
points2 = confirm_over_time.mark_point(size=30).encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# Show text labels upon selection
text1 = test_over_time.mark_text(align='left', fontSize=20, fontWeight=200, dx=5, dy=+15).encode(
    text=alt.condition(nearest, 'cumulative_count:Q', alt.value(' '))
)
text2 = confirm_over_time.mark_text(align='left', fontSize=20, fontWeight=200, dx=5, dy=-25).encode(
    text=alt.condition(nearest, 'cumulative_count:Q', alt.value(' '))
)

# Draw a guideline
rules = alt.Chart(patient).mark_rule(color='black').encode(
    x='test_date:T',
).transform_filter(
    nearest
)

(
    test_over_time + test_over_time_point + confirm_over_time + confirm_over_time_point +
    points1 + points2 + rules + selectors + text1 + text2
    
).properties(
    title="Confirmed Cases Over Time",
    height=500, width=800
).configure_title(fontSize=20,fontWeight=300)

## Relationship Between Test Results And Preconditions
In our artificial data, we have patients' preconditions, such as smoking history and cancer history.

### Test Results vs. Smoking History w/ Crossfilter

In [14]:
viz_test_with_brush.properties(width=300)

ever_smoker = alt.Chart(patient).mark_bar().encode(
    x=("ever_smoker:N"),
    y='count()'
).properties(
    title="Smoker Distribution",
    width=100
)

pack_years = alt.Chart(patient).mark_bar().encode(
    x=alt.X("pack_years:Q", bin=True),
    y=alt.Y('count()')
).properties(
    title="Pack Years Distribution",
    width=300
)

rule = alt.Chart(patient).mark_rule(color='orange').encode(
    x='mean(pack_years):Q',
    size=alt.value(3)
)

# Brushing
ever_smoker_with_brush = alt.layer(
    ever_smoker.encode(color=alt.value("lightgray")),
    ever_smoker.transform_filter(brush_x).add_selection(brush_x)
)
pack_years_with_brush = alt.layer(
    pack_years.encode(color=alt.value("lightgray")),
    pack_years.transform_filter(brush_x).add_selection(brush_x)
)
rule = rule.transform_filter(brush_x)

(viz_test_with_brush | (ever_smoker_with_brush | pack_years_with_brush + rule)).configure_title(fontSize=20,fontWeight=300)

### Pack Years by Test Results
Bar chart w/ error bars

In [15]:
viz_test_by_pack = alt.Chart(patient).mark_bar().encode(
    y='test_result:N',
    x='mean(pack_years)',
    color=alt.Color('test_result:N', scale=scale_test_result)
).properties(
    title="COVID-19 Test Results by Smoking Pack Years w/ Error Bars",
    width=400,
    height=200
)

points = alt.Chart(patient).mark_point(filled=True, color='black').encode(
  y='test_result:N',
  x='mean(pack_years)',
)

error_bars = alt.Chart(patient).mark_errorbar(extent='stdev').encode(
    y='test_result:N',
    x='pack_years',
)

(viz_test_by_pack + error_bars + points).configure_title(fontSize=20,fontWeight=300)

### Test Results by Smoking History
Stacked bar chart

In [16]:
stacked_bar_chart = alt.Chart(patient).mark_bar().encode(
    x='ever_smoker:N',
    y='count()',
    color=alt.Color("test_result:N", scale=scale_test_result)
).properties(
    title="Test Results by Smoking History",
    width=200,
    height=300
)

stacked_bar_chart.configure_title(fontSize=20,fontWeight=300)

### Test Results by Cancer History

In [17]:
cancer = alt.Chart(patient).mark_bar().encode(
    x=("hx_cancer:N"),
    y='count()'
).properties(
    title="Cancer History",
    width=100,
    height=250
)

cancer_type = alt.Chart(patient).mark_bar().encode(
    x=alt.X("hx_cancer_type:N", sort="-y"),
    y='count()',
    color='hx_cancer_type:N',
    column='test_result'
).transform_filter(
    alt.FieldEqualPredicate(field="hx_cancer", equal="yes")
).properties(
    title="Test Results by Cancer Types",
    height=250
)

(cancer | cancer_type).configure_title(fontSize=20,fontWeight=300,anchor="middle")

In [18]:
heatmap = alt.Chart(patient).transform_filter(
    alt.FieldEqualPredicate(field="hx_cancer", equal="yes")
).mark_rect().encode(
    y='test_result:N',
    x=alt.X('hx_cancer_type:N', title="Previous Cancer History"),
    color=alt.Color('count()', legend=alt.Legend(direction='horizontal', title="Number of People"))
).properties(
    title="Test Result vs. Cancer History",
    height=200,
    width=300
)

text = heatmap.mark_text(baseline='middle',fontSize=15,fontWeight=100).encode(
    text='count()',
    color=alt.value("black")
)

(heatmap + text).configure_title(fontSize=20,fontWeight=300,anchor="middle")