In [1]:
import pandas as pd
import altair as alt

In [2]:
alt.data_transformers.disable_max_rows() # Disable 5_000 rows limit
data = pd.read_csv("../leaderboard_code4rena.csv") # Set path accordingly

In [3]:
data

Unnamed: 0,period,handle,prize_money,total_reports,high_all,high_solo,med_all,med_solo,gas_all
0,2022,WatchPug,365004.16,409,68,24,119,44,154
1,2022,cmichel,235136.67,135,30,5,60,19,3
2,2022,hyh,176095.70,200,23,1,66,19,20
3,2022,IllIllI,167108.96,174,17,2,69,14,47
4,2022,leastwood,163097.64,109,37,9,53,25,1
...,...,...,...,...,...,...,...,...,...
1302,ALL TIME,solgryn,5.77,4,0,0,0,0,4
1303,ALL TIME,aez121,0.10,1,0,0,1,0,0
1304,ALL TIME,ad3sh_,0.00,2,0,0,0,0,0
1305,ALL TIME,Kumpirmafyas,0.00,2,0,0,0,0,0


In [4]:
plt_data = data

### qa_all
Assumes missing reports from `total_reports` are QA reports 

In [5]:
plt_data["qa_all"] = plt_data["total_reports"] - (plt_data["high_all"] + plt_data["med_all"] + plt_data["gas_all"])

In [6]:
plt_data = plt_data.melt(id_vars=["period", "prize_money"], value_vars=list(plt_data.columns[3:]), var_name="type")

In [7]:
plt_data["type"] = plt_data["type"].map(
    {
        'gas_all': 'Gas optimization', 
        'high_all': 'Common high',
        'high_solo': 'Unique high',
        'med_all': 'Common medium',
        'med_solo': 'Unique medium',
        'qa_all': 'QA',
        'total_reports': 'Total reports'
    }
)

### Period dropdown
Filter from which period the data should be taken. These are the same options as the [leaderboard](https://code4rena.com/leaderboard/) entries from the Code4rena website.

In [8]:
period_dropdown = alt.binding_select(options=list(plt_data["period"].unique()), name='Select a period:')
period_selector = alt.selection_single(
    fields=['period'], 
    bind=period_dropdown, 
    name="period_selector", 
    init={'period':'ALL TIME'}
)

In [9]:
ordered_legend_reports_labels = [
    'Unique high',
    'Common high',
    'Unique medium',
    'Common medium',
    'Gas optimization', 
    'QA',
    'Total reports'
]
label_colors = ["#FE266D","#FA6C44","#F2E713","#D1D811","#0AB6F8","#5688C1","#AA3678"]

In [10]:
base = alt.Chart(plt_data).transform_calculate(
    order="{ \
    'Total reports': 0, \
    'QA': 1, \
    'Gas optimization': 2, \
    'Common medium': 3, \
    'Unique medium': 4, \
    'Common high': 5, \
    'Unique high': 6}[datum.risk]"
).mark_circle().encode(
    x=alt.X('value:Q', scale=alt.Scale(type='symlog'), title='Number of reports'),
    y=alt.Y('prize_money:Q', title='Prize money ($USD)'),
    color=alt.Color(
        'type:N', 
        title='Report type',
        scale=alt.Scale(domain=ordered_legend_reports_labels, range=label_colors),
        legend=alt.Legend(
            title='Reports type', 
            values=ordered_legend_reports_labels, 
            orient="top",
            labelFontSize=14,
            symbolStrokeWidth=10
        )
    ),
    order='order:O'
).properties(
    width=850,
    height=400
).add_selection(
    period_selector
).transform_filter(
    period_selector
)

### Multi-line regression plot
For a given period, shows the correlation (via polynomial regression) between the number of reports and the prize money of a warden for each report category.

*Note that the x-axis is a logarithmic scale.*

In [11]:
base.transform_regression('value', 'prize_money', method='poly', groupby=['type']).mark_line()