In [98]:
import altair as alt
import pandas as pd

# Data
## Input File Format

- Columns should be `attribute 1, attribute 2, set 1, set 2, ..., set M` where `set` columns contain either `1` or `0`

- `1` indicating the '⬤' representation in UpSet variation

- Rows are each specimen

In [99]:
source = pd.read_csv("exampleSize.csv")

df = pd.DataFrame(source)

In [100]:
from enum import Enum

# better enum class name??
# sets
# value of 0 = miss in this example
class Tools(Enum):
    DELUCSHIT = "DeLUCS hit"
    PHYLOFLASHHIT = "phyloFlash hit"

# Visualization
#### UpSet variation Altair visualizations contain three main views: 

(1) **vertical bar chart** on the top showing an elements's attribute 1

(2) **line chart** on top overlays vertical bar chart  showing another element's attribute 2

(2) **matrix view** on the bottom showing an element's set membership

# Custom upset plot

In [101]:
width=2000
height=800
height_ratio=0.6

# Species labels size
bottom_x_axis_labels_size = 16
bottom_x_axis_labels_weight = alt.FontWeight("normal")

# bar chart settings
vertical_bar_label_size=14
vertical_bar_tick_font_size=14
vertical_bar_thickness=40
vertical_bar_title_font_size=35
vertical_bar_x_axis_field = "Species"
vertical_bar_y_axis_field = "Whole Genome"
vertical_bar_text = "Whole Genome"
vertical_bar_tooltip_title = "Size"
vertical_bar_y_scaling = alt.ScaleType("log")
vertical_bar_label_legend = "Genome Size"

line_chart_font_size = 15
line_chart_y_axis_field = "16S:Q"
line_chart_text = line_chart_y_axis_field
line_chart_label_legend = '16S'

# matrix view settings
glyph_size = 200
matrix_label_size = 13
dot_line_connection_size=2
matrix_title_size = 23
matrix_x_axis_field = vertical_bar_x_axis_field
matrix_title = "Species"
matrix_zebra_strip_field = Tools.DELUCSHIT.value
matrix_set_columns = [Tools.PHYLOFLASHHIT.value, Tools.DELUCSHIT.value]

vertical_bar_chart_height = height * height_ratio
matrix_height = (height - vertical_bar_chart_height) * 0.4
matrix_width = width

main_color = "#303030"
highlight_color = "#09c372"
chart_title = "Genome Size"
mouse_hover_field_selection = "Species"
matrix_view_field_selection = mouse_hover_field_selection
sort_data_by_field = vertical_bar_y_axis_field



##### End tuning parameters #####
# adjust the rest as needed



# on mouse hover tooltip show Genome size value
tooltip = [
    alt.Tooltip(vertical_bar_text, title=vertical_bar_tooltip_title),
]

# on mouse hover over Species
mouse_hover_selection = alt.selection_multi(on="mouseover", fields=[mouse_hover_field_selection])
# applies highlight on mouse hover of Species
# main_color when no mouse hover
#  note the ~
brush_color = alt.condition(~mouse_hover_selection, alt.value(main_color), alt.value(highlight_color))

# sorting by Whole Genome Descending to be used by upset plot
df = df.sort_values(by=sort_data_by_field, ascending=False)
# no dot in a hit row = tool misses
upset_plot_sorting = alt.SortField(field=sort_data_by_field, order='descending')

In [102]:
# vertical bar chart
vertical_bar = alt.Chart(df.copy(), title=chart_title).mark_bar(color=main_color, size=vertical_bar_thickness).encode(
    x=alt.X(
        vertical_bar_x_axis_field, 
        axis=alt.Axis(grid=False, labels=False, ticks=False, domain=True),
        title=None,
        sort=upset_plot_sorting,
    ),
    y=alt.Y(
        vertical_bar_y_axis_field,
        axis=alt.Axis(
            grid=False,
            orient='left',
            labelFontSize=vertical_bar_tick_font_size,
            ),
        title=None,
        scale=alt.Scale(type=vertical_bar_y_scaling)
    ),
    tooltip=tooltip,
    color=brush_color
).properties(
    width=matrix_width,
    height=vertical_bar_chart_height
)

vertical_bar.configure_title(fontSize=10000)

vertical_bar_text = vertical_bar.mark_text(
    color=main_color, 
    dy=-10,
    size=vertical_bar_label_size,
    fontWeight='bold',
).encode(
    text=alt.Text(vertical_bar_text)
)

# line chart
vertical_bar_line = vertical_bar.mark_line(color=highlight_color, opacity=1).encode(
    color=alt.value(highlight_color),
    y=alt.Y(
            line_chart_y_axis_field,
           )
    )

line_text = vertical_bar_line.mark_text(
    color=main_color,
    align='center',
    baseline='middle',
    dy=-19,
    fontWeight='bold',
    fontSize=line_chart_font_size,
).encode(
    text=alt.Text(line_chart_text)
)

# Combines the separate components to create the vertical bar chart
# mouse interaction added
vertical_bar_chart = (vertical_bar + vertical_bar_text + vertical_bar_line + line_text).add_selection(
    mouse_hover_selection
)


In [103]:
# create custom bar and line charts legend
bardata = pd.DataFrame({
    'y':[line_chart_label_legend],
    'x':[vertical_bar_label_legend]
                       
})

legendline = alt.Chart(bardata).mark_tick(
        size=15, 
        thickness=3, 
        orient='horizontal', 
        color='black',
        opacity=1
).encode(
        color=alt.value(highlight_color),
            y=alt.Y('y:N',
                    axis=alt.Axis(orient='right', 
                                  titleFontSize=0, 
                                  labelFontSize=18, 
                                  titleX=80, 
                                  titleY=-5,
                                  titleAngle=0
                                 ),

                   ),
    )

legendbar = alt.Chart(bardata).mark_square(size=200, color=main_color).encode(
    y=alt.Y('x:N', 
            axis=alt.Axis(orient='right', 
                          titleFontSize=0, 
                          labelFontSize=18,
                          titleX=80, 
                          titleY=-5,
                          titleAngle=0
                         ),
    
           ),
    )


In [104]:
# matrix view selections
selection2 = alt.selection_multi(on="mouseover", fields=[matrix_view_field_selection, "value"])
circle_color = alt.condition(~selection2, alt.Color("value:N", scale=alt.Scale(domain=[0,1], range=["#E6E6E6", main_color]), legend=None), alt.value(highlight_color))

In [105]:
# UpSet glyph view (matrix view) of hits
# value of 0 = miss
matrix_base = alt.Chart(df.copy()).mark_circle(
    size=glyph_size,
    opacity=1
).transform_fold(
    matrix_set_columns
).encode(
    x=alt.X(
        matrix_x_axis_field,
        axis=alt.Axis(grid=False, ticks=False, domain=False, labelFontSize=bottom_x_axis_labels_size, labelFontWeight=bottom_x_axis_labels_weight, labelLimit=10000, labelAngle=-45),
        title=matrix_title,
        sort=upset_plot_sorting
    ),
    y=alt.Y(
        "key:N",
        axis=alt.Axis(grid=False, ticks=False, domain=False, orient='right', labelFontWeight="bold"),
        title=None
    ),
    color=circle_color,
).properties(
    height=matrix_height,
    width= matrix_width
)

# keeps miss dots from being highlighted on mouse hover
grey_glyph_size = glyph_size + 30
circle_grey_miss = matrix_base.mark_circle(size=grey_glyph_size, opacity=1).transform_filter(
    (alt.datum["value"] == 0)
).encode(
    color=alt.value("#E6E6E6")
)

# connect dots new changes broke it but might no longer be needed
# upset plots usually connect the dots in matrix view
# line_connect_dots = matrix_base.mark_bar(size=dot_line_connection_size, color=main_color).transform_filter(
#     alt.datum["value"] == 1
# ).encode(
#     y=alt.Y("min(key):N"),
#     y2=alt.Y2("max(key):N"),
#     color=alt.value(main_color)
# )

# highlights every other row of matrix view to distinguish hit and miss rows 
zebra_strip_matrix_view = matrix_base.mark_rect().transform_filter(
    (alt.datum.key == matrix_zebra_strip_field)
).encode(
    color=alt.value("#F7F7F7")
)

# Combines the separate components to create the matrix view
# order seems matter
# mouse interaction added
# Duplicate `circle` is to properly show tooltips and on mouse color highlighting
matrix_view = (matrix_base + zebra_strip_matrix_view + matrix_base + circle_grey_miss).add_selection(
    mouse_hover_selection,
    selection2
)


In [106]:
# combine all charts to make final upset plot
upset_plot = alt.hconcat(
    vertical_bar_chart,
    legendline + legendbar
)

upset_plot = alt.vconcat(
    upset_plot,
    matrix_view
)

# final graph settings tuning
upset_plot.configure_view(
    stroke=None
).configure_axis(
    titleFontSize=matrix_title_size,
    labelFontSize=matrix_label_size
).configure_title(
    fontSize=vertical_bar_title_font_size
)
