In [1]:
import pandas as pd
import altair as alt

In [2]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
data = pd.read_csv("../github_code4rena.csv")
data["date"] = pd.to_datetime(data["date"])
plt_data = pd.DataFrame()

### Active wardens
Represents the number of wardens who participated in at least one contest during the month

In [4]:
plt_data["active_wardens"] = data.groupby("date")["handle"].nunique()

### Inactive wardens
Represents wardens who have only been active for one month maximum 

In [5]:
one_timers = data.groupby(["handle", "date"]).size().groupby(level=0).size().to_frame()[lambda x: x.iloc[:, [0]] <= 1].dropna().reset_index()["handle"]
plt_data["inactive_wardens"] = data[data["handle"].isin(one_timers)].groupby("date")["handle"].nunique().shift(1, fill_value=0)

### New wardens
Represents wardens that made their first contest appearance during the month

In [6]:
plt_data["new_wardens"] = (data.groupby("date")["handle"].unique().map(lambda x: set(x)) - data.groupby("date")["handle"].unique().map(lambda x: set(x)).shift(1)).map(lambda x: len(x) if isinstance(x, set) else 0)
plt_data.iat[0, 2] = plt_data.iat[0, 0] # Active wardens = New wardens for first contest

### Non-participating wardens
Wardens who have been active at different times and have not participated for this particular month (doesn't include inactive wardens)

In [7]:
plt_data["total_inactive_wardens"] = plt_data["inactive_wardens"].cumsum()
plt_data["total_wardens"] = plt_data["new_wardens"].cumsum()
plt_data["non_participating_wardens"] = plt_data["total_wardens"] - plt_data["active_wardens"] - plt_data["total_inactive_wardens"]
plt_data.reset_index(inplace=True)

In [8]:
plt_data

Unnamed: 0,date,active_wardens,inactive_wardens,new_wardens,total_inactive_wardens,total_wardens,non_participating_wardens
0,2021-04-01,17,0,17,0,17,0
1,2021-05-01,22,3,8,3,25,0
2,2021-06-01,19,5,7,8,32,5
3,2021-07-01,28,2,14,10,46,8
4,2021-08-01,30,6,15,16,61,15
5,2021-09-01,38,7,22,23,83,22
6,2021-10-01,37,7,17,30,100,33
7,2021-11-01,80,5,53,35,153,38
8,2021-12-01,65,14,29,49,182,68
9,2022-01-01,119,8,71,57,253,77


In [9]:
base = alt.Chart(plt_data).transform_fold(
    ['non_participating_wardens', 'total_inactive_wardens', 'active_wardens'],
    as_=['column', 'value']
).encode(
    x=alt.X('date:T', axis=alt.Axis(format='%m/%y', title='', grid=False)),
    y=alt.Y('value:Q', axis=alt.Axis(title='Total warden count')),
)

In [10]:
bars = base.mark_bar(size=30).encode(
    color=alt.Color('column:N', title='Warden categories'),
)

In [11]:
active_labels = base.mark_text(
    dy=10,
    color='white'
).transform_calculate(
    percentActive="datum.active_wardens / datum.total_wardens",
    percentNonParticipating="datum.non_participating_wardens / datum.total_wardens",
    percentInactive="datum.total_inactive_wardens / datum.total_wardens"
).encode(
    y='total_wardens:Q',
    text=alt.Text('percentActive:Q', format='.0%')
)

In [12]:
non_p_labels = base.mark_text(
    dy=10,
    color='white'
).transform_calculate(
    dy='datum.non_participating_wardens + datum.total_inactive_wardens',
    percentNonParticipating="datum.non_participating_wardens / datum.total_wardens"
).encode(
    y='dy:Q',
    text=alt.Text('percentNonParticipating:Q', format='.0%'),
    opacity=alt.condition('datum.non_participating_wardens > 10', alt.value(1), alt.value(0))
)

In [13]:
inactive_labels = base.mark_text(
    dy=10,
    color='white'
).transform_calculate(
    percentInactive="datum.total_inactive_wardens / datum.total_wardens"
).encode(
    y='total_inactive_wardens:Q',
    text=alt.Text('percentInactive:Q', format='.0%'),
    opacity=alt.condition('datum.total_inactive_wardens > 10', alt.value(1), alt.value(0))
)

In [14]:
(bars + active_labels + non_p_labels + inactive_labels).properties(width=700,height=400)