In [1]:
import pandas as pd
import altair as alt

In [2]:
alt.data_transformers.disable_max_rows()
brush = alt.selection(type='interval')

In [3]:
data = pd.read_csv("../code4rena.csv")
data["date"] = pd.to_datetime(data["date"])
plt_data = pd.DataFrame()

### Active wardens
Represents the number of wardens who participated in at least one contest during the month

In [4]:
plt_data["Active wardens"] = data.groupby("date")["handle"].nunique()

### Inactive wardens
Represents wardens who have only been active for one month maximum 

In [5]:
one_timers = data.groupby(["handle", "date"]).size().groupby(level=0).size().to_frame()[lambda x: x.iloc[:, [0]] <= 1].dropna().reset_index()["handle"]
plt_data["Inactive wardens"] = data[data["handle"].isin(one_timers)].groupby("date")["handle"].nunique().shift(1, fill_value=0)

### New wardens
Represents wardens that made their first contest appearance during the month

In [6]:
plt_data["New wardens"] = (data.groupby("date")["handle"].unique().map(lambda x: set(x)) - data.groupby("date")["handle"].unique().map(lambda x: set(x)).shift(1)).map(lambda x: len(x) if isinstance(x, set) else 0)
plt_data.iat[0, 2] = plt_data.iat[0, 0] # Active wardens = New wardens for first contest

### Non-participating wardens
Wardens who have been active at different times and have not participated for this particular month (doesn't include inactive wardens)

In [15]:
plt_data["Total inactive wardens"] = plt_data["Inactive wardens"].cumsum()
plt_data["Total wardens"] = plt_data["New wardens"].cumsum()
plt_data["Non-participating wardens"] = plt_data["Total wardens"] - plt_data["Active wardens"] - plt_data["Total inactive wardens"]
plt_data.reset_index(inplace=True)

In [16]:
plt_data

Unnamed: 0,index,date,Active wardens,Inactive wardens,New wardens,Total inactive wardens,Total wardens,Non-participating wardens
0,0,2021-04-01,17,0,17,0,17,0
1,1,2021-05-01,22,3,8,3,25,0
2,2,2021-06-01,19,5,7,8,32,5
3,3,2021-07-01,28,2,14,10,46,8
4,4,2021-08-01,30,6,15,16,61,15
5,5,2021-09-01,38,7,22,23,83,22
6,6,2021-10-01,37,7,17,30,100,33
7,7,2021-11-01,80,5,53,35,153,38
8,8,2021-12-01,65,14,29,49,182,68
9,9,2022-01-01,119,8,71,57,253,77


In [26]:
alt.Chart(plt_data).transform_fold(
  ['Non-participating wardens', 'Total inactive wardens', 'Active wardens'],
  as_=['column', 'value']
).mark_bar().encode(
  x=alt.X('date:T', axis=alt.Axis(format='%m/%y', title='')),
  y=alt.Y('value:Q', axis=alt.Axis(title='Total warden count')),
  color=alt.Color('column:N', title='Warden categories')
)