In [1]:
import os
import sys
import math
import logging
import structlog
from pathlib import Path
import json

import numpy as np

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("poster")
sns.set(rc={"figure.figsize": (16, 9.)})
sns.set_style("whitegrid")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

In [2]:
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [3]:
import pytanis
from pytanis import GoogleAPI, PretalxAPI
from pytanis.review import read_assignment_as_df, sub_tracks_as_df, save_assignments_as_json, Col

In [4]:
# Be aware that this notebook might only run with the following version
pytanis.__version__ 

'0.0.2.post1.dev5+g72b5045'

In [5]:
# General settings
SPREADSHEET_ID = "16iuVRM7V3p3FgfU-8WjwPlMAYJB-DvwRhYCi_heiftA"
RANGE = "Form responses 1"
EVENT_NAME = "pyconde-pydata-berlin-2023"
community_map = "General: Community, Diversity, Career, Life and everything else", "General: Community" # to make splitting easier in GSheet

# Read current assignments and get all submissions from Pretalx

In [6]:
curr_assign_df = read_assignment_as_df("assignments_20230112_1.json")

papi = PretalxAPI()
subs_count, subs = papi.submissions(EVENT_NAME)
subs_df = sub_tracks_as_df(subs)

In [7]:
subs_df[Col.track].replace(dict([community_map]), inplace=True)

In [8]:
# save all submission codes for later
all_sub_codes = list(subs_df.index)

In [9]:
# set the number of reviews we want
subs_df[Col.needed_nreviews] = 3

# Get spreadsheat with reviewers and preferences

In [10]:
gapi = GoogleAPI()
gapi.init_token()

In [11]:
gsheet_df = gapi.gsheet_as_df(SPREADSHEET_ID, RANGE)
# rename columns to stick to our convention
col_map = {
 "Topics you want to review": Col.track_prefs,
 "Email address": Col.email,
 "Name": Col.name,
 "Affiliation": Col.affiliation,
 "Who do you know from the Committee?": Col.committee_contact,
 "Availability during the Review Period": Col.availability,
 "Additional comments regarding your availability during the review period.": Col.availability_comment,
 "Activated in Pretalx": Col.pretalx_activated,
 "Do you want your name to be listed as a reviewer on the conference website?": Col.public,
 "Wants all proposals": Col.all_proposals,
 "Any additional comments for the Program Committee": Col.comment,
}
gsheet_df.rename(columns=col_map, inplace=True)

INFO:googleapiclient.discovery_cache:file_cache is only supported with oauth2client<4.0.0


### Do some transformations to handle the GSheet

In [12]:
# parse preferences
gsheet_df[Col.track_prefs] = gsheet_df[Col.track_prefs].apply(lambda x: x.replace(community_map[0], community_map[1]).split(', '))
gsheet_df = gsheet_df.loc[gsheet_df[Col.pretalx_activated] != '']
# save people that want all proposals for later
assign_all_emails = gsheet_df[Col.email].loc[gsheet_df[Col.all_proposals] == 'x'].tolist()
gsheet_df = gsheet_df.loc[gsheet_df[Col.all_proposals] != 'x']

In [13]:
gsheet_df = pd.merge(gsheet_df, curr_assign_df, on=Col.email, how='left')
gsheet_df[Col.curr_assignments] = gsheet_df[Col.curr_assignments].map(lambda x: x if isinstance(x, list) else [])

In [14]:
# Remove assignments that are not longer available
gsheet_df[Col.curr_assignments] = gsheet_df[Col.curr_assignments].map(lambda sub_lst: [sub for sub in sub_lst if sub in all_sub_codes])

In [15]:
gsheet_df[Col.num_assignments] = gsheet_df[Col.curr_assignments].map(len)

# Determine which proposals need to be assigned

In [16]:
# find out what we still have to assign
curr_assignment_counts = gsheet_df[Col.curr_assignments].explode().value_counts()
curr_assignment_counts = curr_assignment_counts.loc[curr_assignment_counts.index.isin(all_sub_codes)] # drop assignments that are no longer in Pretalx

dist_df = subs_df.copy()
dist_df.loc[curr_assignment_counts.index, Col.needed_nreviews] = subs_df[Col.needed_nreviews].loc[curr_assignment_counts.index] - curr_assignment_counts

# Assign remaining proposals to reviewers

The main idea is to assign each submission the number of times it still needs to be assigned to the person having a preference for the track with the least amount of current work. 
If no person has a preference for the track of the proposal, assign to someone with not much work.

In [17]:
def find_user(df, mask):
    if df.loc[mask].empty:
        return df[Col.num_assignments].idxmin()
    else:
        return df.loc[mask, Col.num_assignments].idxmin()

def assign_proposals(dist_df, curr_df):
    curr_df = curr_df.copy()
    for sub, row in dist_df.iterrows():
        mask = curr_df[Col.track_prefs].map(lambda x: row[Col.track] in x)
        for _ in range(row[Col.needed_nreviews]):
            idx = find_user(curr_df, mask)
            curr_df.loc[idx, Col.curr_assignments].append(sub)
            curr_df.loc[idx, Col.num_assignments] += 1
    return curr_df

In [18]:
assign_df = assign_proposals(dist_df, gsheet_df)

In [19]:
# add people that want all proposals assigned again
all_subs_df = pd.DataFrame({Col.email: assign_all_emails, Col.curr_assignments: [all_sub_codes] * len(assign_all_emails)})
assign_df = pd.concat([assign_df, all_subs_df]).reset_index()

# Save it as JSON

In [20]:
save_assignments_as_json(assign_df, "assignments_20230114_1.json")