In [None]:
import os
import sys
import math
import logging
import structlog
from pathlib import Path
import json
from copy import deepcopy
import pickle

import tomli
import numpy as np

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns

sns.set_context('poster')
sns.set(rc={'figure.figsize': (12, 6.0)})
sns.set_style('whitegrid')

import pandas as pd

pd.set_option('display.max_rows', 120)
pd.set_option('display.max_columns', 120)

In [None]:
logging.basicConfig(level=logging.WARNING, stream=sys.stdout)

In [None]:
import pytanis
from pytanis import GSheetsClient, PretalxClient
from pytanis.pretalx import subs_as_df
from pytanis.review import read_assignment_as_df, save_assignments_as_json, Col

In [None]:
# Be aware that this notebook might only run with the following version
pytanis.__version__

In [None]:
# Import event-specific settings to don't have them here in the notebook
with open('config.toml', 'rb') as fh:
    cfg = tomli.load(fh)

TARGET_REVIEWS = 3  # We want at least 3 reviews per proposal
BUFFER_REVIEWS = 2  # Additionally assign 2 more as long as TARGET_REVIEWS is not fullfilled.
RND_STATE = 1729  # Random state or None for really random
community_map = (
    'General: Community, Diversity, Career, Life and everything else',
    'General: Community',
)  # to make splitting easier in GSheet

# Read Reviews and all Submissions

In [None]:
pretalx_client = PretalxClient(blocking=True)
subs_count, subs = pretalx_client.submissions(cfg['event_name'])
revs_count, revs = pretalx_client.reviews(cfg['event_name'])
subs, revs = list(subs), list(revs)

In [None]:
# count reviews that actually have scores
revs_vcounts = pd.Series([r.submission for r in revs if r.score is not None]).value_counts()
revs_vcounts = revs_vcounts.reset_index().rename(columns={'index': Col.submission, 'count': Col.nreviews})
# keep track of all reviews, i.e. proposals people interacted with
revs_df_raw = pd.DataFrame([
    {'created': r.created, 'updated': r.updated, Col.pretalx_user: r.user, 'score': r.score, 'review': r.submission}
    for r in revs
])
revs_df = revs_df_raw.groupby([Col.pretalx_user]).agg(lambda x: x.tolist()).reset_index()

In [None]:
subs_df = subs_as_df([sub for sub in subs if sub.state.value == 'submitted'])  # Take only submitted ones
subs_df[Col.track].replace(dict([community_map]), inplace=True)
subs_df[Col.target_nreviews] = TARGET_REVIEWS
subs_df = pd.merge(subs_df, revs_vcounts, on=Col.submission, how='left')
subs_df[Col.nreviews] = subs_df[Col.nreviews].fillna(0).astype(int)
subs_df[Col.rem_nreviews] = (subs_df[Col.target_nreviews] - subs_df[Col.nreviews]).map(lambda x: max(0, x))

In [None]:
# save all submission codes for later
all_sub_codes = list(subs_df[Col.submission])

## Some Statistics about the current Review Process

In [None]:
sns_df = (
    subs_df[Col.nreviews].value_counts().reset_index().rename(columns={'count': '#Proposal', Col.nreviews: '#Reviews'})
)
bp = sns.barplot(
    sns_df,
    x='#Reviews',
    y='#Proposal',
)
bp.set(ylim=(0, len(subs_df.index)));

In [None]:
progress_df = subs_df.copy()
progress_df[Col.nreviews] = progress_df[Col.nreviews].map(
    lambda x: min(x, TARGET_REVIEWS)
)  # map more than 3 reviews to 3
progress_df = progress_df[[Col.target_nreviews, Col.nreviews]].sum().to_frame().T
f, ax = plt.subplots(figsize=(15, 1))
sns.set_color_codes('pastel')
sns.barplot(data=subs_df[[Col.target_nreviews, Col.nreviews]].sum().to_frame().T, x=Col.target_nreviews, color='b')
sns.set_color_codes('muted')
ax = sns.barplot(data=progress_df, x=Col.nreviews, color='b')
ax.bar_label(
    ax.containers[1], labels=[f'{progress_df.loc[0, Col.nreviews] / progress_df.loc[0, Col.target_nreviews]:.1%}']
);

# Get spreadsheat with reviewers and preferences

In [None]:
gsheet_client = GSheetsClient()

In [None]:
gsheet_df = gsheet_client.gsheet_as_df(cfg['reviewer_spread_id'], cfg['reviewer_work_name'])
# rename columns to stick to our convention
col_map = {
    'Topics you want to review': Col.track_prefs,
    'Email address': 'Invitation email',  # e-mail address we sent the pretalx invitation to
    'Pretalx Mail': Col.email,  # e-mail people used to register.
    'Name': Col.speaker_name,
    'Affiliation': Col.affiliation,
    'Who do you know from the Committee?': Col.committee_contact,
    'Availability during the Review Period': Col.availability,
    'Additional comments regarding your availability during the review period.': Col.availability_comment,
    'Activated in Pretalx': Col.pretalx_activated,
    'Do you want your name to be listed as a reviewer on the conference website?': Col.public,
    'Wants all proposals': Col.all_proposals,
    'Any additional comments for the Program Committee': Col.comment,
    'Pretalx Name': Col.pretalx_user,
}
gsheet_df.rename(columns=col_map, inplace=True)

### Do some transformations to handle the GSheet

In [None]:
# parse preferences
gsheet_df[Col.track_prefs] = gsheet_df[Col.track_prefs].apply(
    lambda x: x.replace(community_map[0], community_map[1]).split(', ')
)
gsheet_df = gsheet_df.loc[~gsheet_df[Col.pretalx_activated].isna()]
# # save people that want all proposals for later
# assign_all_emails = gsheet_df[Col.email].loc[gsheet_df[Col.all_proposals] == 'x'].tolist()
# gsheet_df = gsheet_df.loc[gsheet_df[Col.all_proposals] != 'x']

In [None]:
reviewers_df = pd.merge(gsheet_df, revs_df, on=Col.pretalx_user, how='left')
reviewers_df['review'] = reviewers_df['review'].apply(lambda x: x if isinstance(x, list) else [])
reviewers_df[Col.curr_assignments] = reviewers_df['review'].map(lambda x: x[:])

In [None]:
reviewers_df = reviewers_df.assign(**{
    Col.done_nreviews: reviewers_df['score'].map(
        lambda scores: 0 if not isinstance(scores, list) else len([s for s in scores if not np.isnan(s)])
    )
})

In [None]:
sns_df = (
    reviewers_df[Col.done_nreviews]
    .value_counts()
    .reset_index()
    .rename(columns={'count': '#Reviewers', Col.done_nreviews: 'Done #Reviews'})
)
sns.barplot(sns_df, y='#Reviewers', x='Done #Reviews');

In [None]:
active_rev = pd.DataFrame({
    'Active Reviewers': [
        reviewers_df.assign(started=reviewers_df[Col.done_nreviews] > 0)
        .groupby('started')
        .count()[Col.speaker_name]
        .loc[True]
    ],
    'all': [len(reviewers_df)],
})

In [None]:
f, ax = plt.subplots(figsize=(15, 1))
sns.set_color_codes('pastel')
sns.barplot(data=active_rev, x='all', color='g')
sns.set_color_codes('muted')
ax = sns.barplot(data=active_rev, x='Active Reviewers', color='g')
ax.bar_label(
    ax.containers[1], labels=['{:.1%}'.format(active_rev.loc[0, 'Active Reviewers'] / active_rev.loc[0, 'all'])]
);

In [None]:

ax = sns.barplot(pd.get_dummies(reviewers_df[[Col.track_prefs]].explode(Col.track_prefs), prefix='', prefix_sep='').sum())
plt.xticks(rotation=90)
ax.set_ylabel('#Reviewers');

# Assign proposals to reviewers

The main idea is to assign each the number of needed reviews plus a buffer for a proposal/submission: 
* not a person having already assigned the review for a submission (no duplicates)
* to a person having a preference for the track with the least amount of current work.
* if no person has a preference for the track of the proposal, assign to someone with not much work.
(it might be that someone gets by accident assigned his/her own proposal but Pretalx takes care of that if the same user e-mail was used)

This is done initially. Then whenever this script is rerun, we remove all propoals from the review when the target review number is reached.
We keep the current state, so that the initial number of proposals for review will only get smaller. From last year we learnt that reviewers
hate it when we start assigning more and more work... who would have thought.




In [None]:
def find_reviewer(df, is_preference, is_already_assigned, sub):
    mask = is_preference & ~is_already_assigned
    if df.loc[mask].empty:
        logging.warning(f'No suitable reviewer found for submission {sub}!')
        return df.loc[~is_already_assigned, 'New Assignments'].idxmin()
    else:
        return df.loc[mask, 'New Assignments'].idxmin()

def copy_df(df):
    return pickle.loads(pickle.dumps(df))

def assign_proposals(subs_df, reviewers_df, buffer: int):
    # make a real deep copy. Pandas sucks and even deepcopy doesn't work!
    subs_df, reviewers_df = copy_df(subs_df), copy_df(reviewers_df)

    col_rem_assign, col_new_assign, col_n_assigned = 'Remaining Assignments', 'New Assignments', 'Current #Assignments'
    # sanity check if we cover all preferenes of the submissions
    reviewer_prefs = {e for l in reviewers_df[Col.track_prefs].to_list() for e in l}
    sub_prefs = set(subs_df[Col.track].to_list())
    if reviewer_prefs != sub_prefs:
        msg = (
            'Preferences of reviewers and submission do not align:\n',
            f'Reviewer\\Submission: {reviewer_prefs - sub_prefs}\n',
            f'Submission\\Reviewer: {sub_prefs - reviewer_prefs}',
        )
        raise RuntimeError(msg)

    subs_df = subs_df.sort_values(Col.rem_nreviews, ascending=False)
    # calculate which submissions have been already assigned in reviewers_df
    subs_df.set_index(Col.submission, inplace=True)
    subs_df[col_n_assigned] = 0
    subs_df[col_n_assigned] += reviewers_df[Col.curr_assignments].explode(Col.curr_assignments).value_counts()
    subs_df.reset_index(inplace=True)

    subs_df[col_rem_assign] = subs_df[Col.rem_nreviews] + buffer - subs_df[col_n_assigned]
    reviewers_df[col_new_assign] = 0

    while subs_df[col_rem_assign].sum() > 0:
        for row_idx, row in subs_df.iterrows():
            curr_sub = row[Col.submission]
            is_preference = reviewers_df[Col.track_prefs].map(lambda x: row[Col.track] in x)
            is_already_assigned = reviewers_df[Col.curr_assignments].map(lambda x: curr_sub in x)

            if row[col_rem_assign] == 0:
                continue

            if row[Col.rem_nreviews] < 1:
                logging.info(f'No more reviews needed for submission {curr_sub} thus removing from all reviewers.')
                reviewers_df.loc[is_already_assigned, Col.curr_assignments] = reviewers_df.loc[
                    is_already_assigned, Col.curr_assignments
                ].apply(lambda subs: [s for s in subs if s != curr_sub])
                reviewers_df.loc[is_already_assigned, col_new_assign] -= 1
                subs_df.loc[row_idx, col_rem_assign] = 0
            else:
                reviewer_idx = find_reviewer(reviewers_df, is_preference, is_already_assigned, curr_sub)
                logging.info(
                    f'Assigning submission {curr_sub} to reviewer {reviewers_df.loc[reviewer_idx, Col.pretalx_user]}'
                )
                reviewers_df.loc[reviewer_idx, Col.curr_assignments].append(curr_sub)
                reviewers_df.loc[reviewer_idx, col_new_assign] += 1
                subs_df.loc[row_idx, col_rem_assign] -= 1

    return reviewers_df

In [None]:
# new becomes old...
OLD_ASSIGNMENT_FILE = 'assignments_20231228_1.json'
NEW_ASSIGNMENT_FILE = 'assignments_20231228_2.json'

In [None]:
# Let's give everyone only 10 assignments at first for checking out
# the Pretalx UI and before the actual reviewing phase starts
initial_assign_df = assign_proposals(subs_df, reviewers_df, buffer=BUFFER_REVIEWS)
initial_assign_df[Col.curr_assignments] = initial_assign_df[Col.curr_assignments].apply(lambda x: x[:10])
save_assignments_as_json(initial_assign_df, OLD_ASSIGNMENT_FILE)

In [None]:
# We re-run after the official review phase has started. Later always replace old with new assignment file etc.
curr_assign_df = read_assignment_as_df(OLD_ASSIGNMENT_FILE).set_index(Col.email)
reviewers_with_curr_assign_df = copy_df(reviewers_df).set_index(Col.email)
reviewers_with_curr_assign_df[Col.curr_assignments] = curr_assign_df
reviewers_with_curr_assign_df.reset_index(inplace=True)
new_assign_df = assign_proposals(subs_df, reviewers_with_curr_assign_df, buffer=BUFFER_REVIEWS)
save_assignments_as_json(new_assign_df, NEW_ASSIGNMENT_FILE)