From 8d9c20de372f41133f55a5e8b1f94619f0c8f261 Mon Sep 17 00:00:00 2001
From: Florian Wilhelm <florian.wilhelm@gmail.com>
Date: Thu, 11 Jan 2024 19:57:33 +0100
Subject: [PATCH] Update assignment algorithmn

---
 .../20_mail_to_reviewers_v1.ipynb             | 143 ++++++++++++++++--
 .../30_reviewer-assignment_v1.ipynb           | 125 ++++++---------
 2 files changed, 174 insertions(+), 94 deletions(-)

diff --git a/notebooks/pyconde-pydata-berlin-2024/20_mail_to_reviewers_v1.ipynb b/notebooks/pyconde-pydata-berlin-2024/20_mail_to_reviewers_v1.ipynb
index 76d937c..1dcee87 100644
--- a/notebooks/pyconde-pydata-berlin-2024/20_mail_to_reviewers_v1.ipynb
+++ b/notebooks/pyconde-pydata-berlin-2024/20_mail_to_reviewers_v1.ipynb
@@ -150,7 +150,7 @@
    "outputs": [],
    "source": [
     "# for activated reviewers we take the e-mail address of their pretalx account\n",
-    "reviewers_activated = gsheet_df.loc[gsheet_df[Col.pretalx_activated].notnull() & gsheet_df[Col.committee_member].isna(), [Col.speaker_name, 'Pretalx Mail', Col.address_as]]\n",
+    "reviewers_activated = gsheet_df.loc[gsheet_df[Col.pretalx_activated].notnull(), [Col.speaker_name, 'Pretalx Mail', Col.address_as]]\n",
     "reviewers_activated = reviewers_activated.apply(lambda x: Recipient(name=x[Col.speaker_name], email=x['Pretalx Mail'], address_as=x[Col.address_as]), axis=1).to_list()"
    ]
   },
@@ -407,19 +407,10 @@
     "    team_id=cfg[\"team_id\"],\n",
     "    agent_id=cfg[\"agent_id\"],\n",
     "    status=\"solved\",\n",
-    "    recipients=reviewers_activated[53:]\n",
+    "    recipients=reviewers_activated\n",
     ")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "reviewers_activated[53:]"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -427,7 +418,7 @@
    "outputs": [],
    "source": [
     "mail_client = MailClient()\n",
-    "responses, errors = mail_client.send(mail, dry_run=False)\n",
+    "responses, errors = mail_client.send(mail, dry_run=True)\n",
     "assert not errors"
    ]
   },
@@ -498,7 +489,7 @@
     "    team_id=cfg[\"team_id\"],\n",
     "    agent_id=cfg[\"agent_id\"],\n",
     "    status=\"solved\",\n",
-    "    recipients=reviewers\n",
+    "    recipients=reviewers_activated\n",
     ")"
    ]
   },
@@ -513,12 +504,82 @@
     "assert not errors"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mail regarding the start of the Review Phase"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mail_body = \"\"\"\n",
+    "Hi {recipient.address_as},\n",
+    "\n",
+    "les jeux sont faits! The chips are down! and our Call for Proposals is now finally closed :-)\n",
+    "We are very happy to announce that we have now a total of 484 proposals! That's even 57 more than last year.\n",
+    "\n",
+    "So let's all begin with the review process and make this year's PyConDE & PyData Berlin 2024 a great success.\n",
+    "For this year's review process will try a new approach, simplified approach to last year's process.\n",
+    "We will assign each reviewer about 33 proposals but expect everyone to review only at least 22 proposals until\n",
+    "the end of January. So if you have time and want to review more proposals than 22, please feel free to do so.\n",
+    "This helps out another reviewer who couldn't complete 22 proposals for unforeseen reasons.\n",
+    "If you want to know more, read the PS text of this mail.\n",
+    "\n",
+    "If you have any questions, need help, don't hesitate contacting us at program24@pycon.de.\n",
+    "We appreciate your help very much and want to make sure that you are having a good time.\n",
+    "\n",
+    "Thank you very much {recipient.address_as} for your support!\n",
+    "\n",
+    "Summary:\n",
+    "* Review period: 9. January 2024 - 31 January, 00:00 CET\n",
+    "* Pretalx: https://pretalx.com/orga/event/pyconde-pydata-2024/reviews/\n",
+    "* Reviewer Guidelines (http://bit.ly/pyconde24-reviewer-guidelines)\n",
+    "* [Nonobligatory] PyCon 2024 - Reviewer's Get Together, Meet & Greet and your Questions:\n",
+    "  - Thursday, 11. January 2024 · 5:00 bis 5:40PM CET, video call: https://meet.google.com/zsn-avdq-yhy\n",
+    "* Contact program24@pycon.de for support if needed\n",
+    "\n",
+    "All the best,\n",
+    "Program Committee\n",
+    "PyCon DE & PyData Berlin 2024\n",
+    "\n",
+    "PS: In total, we are looking like last year for 3 reviews per proposal to get a fair evaluation of each proposal.\n",
+    "To make sure we really have 3 reviews per proposal in the end, we will initially assign each proposal to 5 reviewers.\n",
+    "Whenever a proposal has 3 reviews, it will be removed from the list of proposals to review for the remaining reviewers.\n",
+    "This is the reason why we define a minimum number of proposals to review for each reviewer but assign more proposals.\n",
+    "\"\"\""
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "mail = Mail(\n",
+    "    subject=\"[PyConDE/PyData 2024] Our Review Phase starts TODAY\",\n",
+    "    text=mail_body,\n",
+    "    team_id=cfg[\"team_id\"],\n",
+    "    agent_id=cfg[\"agent_id\"],\n",
+    "    status=\"solved\",\n",
+    "    recipients=reviewers_activated\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mail_client = MailClient()\n",
+    "responses, errors = mail_client.send(mail, dry_run=False)\n",
+    "assert not errors"
+   ]
   },
   {
    "cell_type": "code",
@@ -747,6 +808,60 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Mail about start of the review period"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mail_body = \"\"\"\n",
+    "Hi {recipient.address_as}!\n",
+    "\n",
+    "The review of proposals for the PyConDE / PyData is already in full swing and we are happy to have you on board!\n",
+    "{recipient.data.feedback}\n",
+    "\n",
+    "Please check back frequently into Pretalx as we will soon assign proposals more dynamically. \n",
+    "PyConDE / PyData is completely community driven by volunteers like you and we highly appreciate your support!\n",
+    "We will keep you updated.\n",
+    "\n",
+    "Information from our past e-mails:\n",
+    "* Reviewer Guidelines (https://bit.ly/pyconde23-reviewer-guidelines)\n",
+    "* [Nonobligatory] 2nd live “Meet and Greet” session: 17 January, 17:00 (CET) in Gather Town (https://bit.ly/pyconde23-meet-reviewers)\n",
+    "* Review to be finished by: 31 January, 00:00 (CET)\n",
+    "* Contact program23@pycon.de for support if needed\n",
+    "\n",
+    "Thank you very much {recipient.address_as} for your support!\n",
+    "\n",
+    "\n",
+    "All the best,\n",
+    "Program Committee\n",
+    "PyCon DE & PyData Berlin 2023\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mail = Mail(\n",
+    "    subject=\"Update for you on the review process for PyCon DE / PyData!\",\n",
+    "    text=mail_body,\n",
+    "    team_id=cfg[\"team_id\"],\n",
+    "    agent_id=cfg[\"agent_id\"],\n",
+    "    status=\"solved\",\n",
+    "    recipients=active_recipients\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/notebooks/pyconde-pydata-berlin-2024/30_reviewer-assignment_v1.ipynb b/notebooks/pyconde-pydata-berlin-2024/30_reviewer-assignment_v1.ipynb
index 3b1fb68..d8db490 100644
--- a/notebooks/pyconde-pydata-berlin-2024/30_reviewer-assignment_v1.ipynb
+++ b/notebooks/pyconde-pydata-berlin-2024/30_reviewer-assignment_v1.ipynb
@@ -137,7 +137,7 @@
     "    {'created': r.created, 'updated': r.updated, Col.pretalx_user: r.user, 'score': r.score, 'review': r.submission}\n",
     "    for r in revs\n",
     "])\n",
-    "revs_df = revs_df_raw.groupby([Col.pretalx_user]).agg(lambda x: x.tolist()).reset_index()"
+    "revs_user_df = revs_df_raw.groupby([Col.pretalx_user]).agg(lambda x: x.tolist()).reset_index()"
    ]
   },
   {
@@ -182,12 +182,13 @@
     "sns_df = (\n",
     "    subs_df[Col.nreviews].value_counts().reset_index().rename(columns={'count': '#Proposal', Col.nreviews: '#Reviews'})\n",
     ")\n",
-    "bp = sns.barplot(\n",
+    "ax = sns.barplot(\n",
     "    sns_df,\n",
     "    x='#Reviews',\n",
     "    y='#Proposal',\n",
     ")\n",
-    "bp.set(ylim=(0, len(subs_df.index)));"
+    "ax.set_title('Number of reviews per proposal')\n",
+    "ax.set(ylim=(0, len(subs_df.index)));"
    ]
   },
   {
@@ -206,6 +207,7 @@
     "sns.barplot(data=subs_df[[Col.target_nreviews, Col.nreviews]].sum().to_frame().T, x=Col.target_nreviews, color='b')\n",
     "sns.set_color_codes('muted')\n",
     "ax = sns.barplot(data=progress_df, x=Col.nreviews, color='b')\n",
+    "ax.set_title('Review Progress')\n",
     "ax.bar_label(\n",
     "    ax.containers[1], labels=[f'{progress_df.loc[0, Col.nreviews] / progress_df.loc[0, Col.target_nreviews]:.1%}']\n",
     ");"
@@ -273,9 +275,9 @@
     "    lambda x: x.replace(community_map[0], community_map[1]).split(', ')\n",
     ")\n",
     "gsheet_df = gsheet_df.loc[~gsheet_df[Col.pretalx_activated].isna()]\n",
-    "# # save people that want all proposals for later\n",
-    "# assign_all_emails = gsheet_df[Col.email].loc[gsheet_df[Col.all_proposals] == 'x'].tolist()\n",
-    "# gsheet_df = gsheet_df.loc[gsheet_df[Col.all_proposals] != 'x']"
+    "# save people that want all proposals for later\n",
+    "assign_all_emails = gsheet_df[Col.email].loc[gsheet_df[Col.all_proposals] == 'x'].tolist()\n",
+    "gsheet_df = gsheet_df.loc[gsheet_df[Col.all_proposals] != 'x']"
    ]
   },
   {
@@ -284,7 +286,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "reviewers_df = pd.merge(gsheet_df, revs_df, on=Col.pretalx_user, how='left')\n",
+    "reviewers_df = pd.merge(gsheet_df, revs_user_df, on=Col.pretalx_user, how='left')\n",
     "reviewers_df['review'] = reviewers_df['review'].apply(lambda x: x if isinstance(x, list) else [])\n",
     "reviewers_df[Col.curr_assignments] = reviewers_df['review'].map(lambda x: x[:])"
    ]
@@ -314,7 +316,8 @@
     "    .reset_index()\n",
     "    .rename(columns={'count': '#Reviewers', Col.done_nreviews: 'Done #Reviews'})\n",
     ")\n",
-    "sns.barplot(sns_df, y='#Reviewers', x='Done #Reviews');"
+    "ax = sns.barplot(sns_df, y='#Reviewers', x='Done #Reviews')\n",
+    "ax.set_title('Reviews done per reviewer');"
    ]
   },
   {
@@ -345,6 +348,7 @@
     "sns.barplot(data=active_rev, x='all', color='g')\n",
     "sns.set_color_codes('muted')\n",
     "ax = sns.barplot(data=active_rev, x='Active Reviewers', color='g')\n",
+    "ax.set_title('Active Reviewers')\n",
     "ax.bar_label(\n",
     "    ax.containers[1], labels=['{:.1%}'.format(active_rev.loc[0, 'Active Reviewers'] / active_rev.loc[0, 'all'])]\n",
     ");"
@@ -359,6 +363,7 @@
     "\n",
     "ax = sns.barplot(pd.get_dummies(reviewers_df[[Col.track_prefs]].explode(Col.track_prefs), prefix='', prefix_sep='').sum())\n",
     "plt.xticks(rotation=90)\n",
+    "ax.set_title('Track Preferences of Reviewers')\n",
     "ax.set_ylabel('#Reviewers');"
    ]
   },
@@ -378,6 +383,10 @@
     "We keep the current state, so that the initial number of proposals for review will only get smaller. From last year we learnt that reviewers\n",
     "hate it when we start assigning more and more work... who would have thought.\n",
     "\n",
+    "Other considerations for the algorithm:\n",
+    "* Don't have a state, i.e. do not depend on a former assignemnt json, been there, tried it, always causes trouble in the end\n",
+    "* Make sure reviewers can visit submissions again which they already reviewed, i.e. make sure you do not remove already reviwed submission from a new assignment.\n",
+    "* Keep in mind that there are always reviwers that sign up late for the party.\n",
     "\n"
    ]
   },
@@ -387,22 +396,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def find_reviewer(df, is_preference, is_already_assigned, sub):\n",
-    "    mask = is_preference & ~is_already_assigned\n",
-    "    if df.loc[mask].empty:\n",
-    "        logging.warning(f'No suitable reviewer found for submission {sub}!')\n",
-    "        return df.loc[~is_already_assigned, 'New Assignments'].idxmin()\n",
-    "    else:\n",
-    "        return df.loc[mask, 'New Assignments'].idxmin()\n",
-    "\n",
     "def copy_df(df):\n",
     "    return pickle.loads(pickle.dumps(df))\n",
     "\n",
     "def assign_proposals(subs_df, reviewers_df, buffer: int):\n",
+    "    col_rem_assign, col_n_assigned = 'Remaining Assignments', 'Current #Assignments'\n",
+    "\n",
+    "    def find_reviewer(df, is_preference, is_already_assigned, sub):\n",
+    "        mask = is_preference & ~is_already_assigned\n",
+    "        if df.loc[mask].empty:\n",
+    "            logging.warning(f'No suitable reviewer found for submission {sub}!')\n",
+    "            return df.loc[~is_already_assigned, col_n_assigned].idxmin()\n",
+    "        else:\n",
+    "            return df.loc[mask, col_n_assigned].idxmin()\n",
+    "\n",
     "    # make a real deep copy. Pandas sucks and even deepcopy doesn't work!\n",
     "    subs_df, reviewers_df = copy_df(subs_df), copy_df(reviewers_df)\n",
     "\n",
-    "    col_rem_assign, col_new_assign, col_n_assigned = 'Remaining Assignments', 'New Assignments', 'Current #Assignments'\n",
+    "    # make sure each reviewer is assigned the reviews already done by him/her\n",
+    "    reviewers_df[Col.curr_assignments] = reviewers_df['review'].map(lambda x: x[:])\n",
+    "\n",
     "    # sanity check if we cover all preferenes of the submissions\n",
     "    reviewer_prefs = {e for l in reviewers_df[Col.track_prefs].to_list() for e in l}\n",
     "    sub_prefs = set(subs_df[Col.track].to_list())\n",
@@ -417,12 +430,12 @@
     "    subs_df = subs_df.sort_values(Col.rem_nreviews, ascending=False)\n",
     "    # calculate which submissions have been already assigned in reviewers_df\n",
     "    subs_df.set_index(Col.submission, inplace=True)\n",
-    "    subs_df[col_n_assigned] = 0\n",
-    "    subs_df[col_n_assigned] += reviewers_df[Col.curr_assignments].explode(Col.curr_assignments).value_counts()\n",
+    "    subs_df[col_n_assigned] = reviewers_df[Col.curr_assignments].explode(Col.curr_assignments).value_counts()\n",
+    "    subs_df[col_n_assigned] = subs_df[col_n_assigned].fillna(0)\n",
     "    subs_df.reset_index(inplace=True)\n",
     "\n",
-    "    subs_df[col_rem_assign] = subs_df[Col.rem_nreviews] + buffer - subs_df[col_n_assigned]\n",
-    "    reviewers_df[col_new_assign] = 0\n",
+    "    subs_df[col_rem_assign] = subs_df[Col.rem_nreviews].where(subs_df[Col.rem_nreviews] == 0, subs_df[Col.rem_nreviews] + buffer - subs_df[col_n_assigned])\n",
+    "    reviewers_df[col_n_assigned] = reviewers_df[Col.curr_assignments].apply(len)\n",
     "\n",
     "    while subs_df[col_rem_assign].sum() > 0:\n",
     "        for row_idx, row in subs_df.iterrows():\n",
@@ -430,73 +443,25 @@
     "            is_preference = reviewers_df[Col.track_prefs].map(lambda x: row[Col.track] in x)\n",
     "            is_already_assigned = reviewers_df[Col.curr_assignments].map(lambda x: curr_sub in x)\n",
     "\n",
-    "            if row[col_rem_assign] == 0:\n",
-    "                continue\n",
-    "\n",
-    "            if row[Col.rem_nreviews] < 1:\n",
-    "                logging.info(f'No more reviews needed for submission {curr_sub} thus removing from all reviewers.')\n",
-    "                reviewers_df.loc[is_already_assigned, Col.curr_assignments] = reviewers_df.loc[\n",
-    "                    is_already_assigned, Col.curr_assignments\n",
-    "                ].apply(lambda subs: [s for s in subs if s != curr_sub])\n",
-    "                reviewers_df.loc[is_already_assigned, col_new_assign] -= 1\n",
-    "                subs_df.loc[row_idx, col_rem_assign] = 0\n",
-    "            else:\n",
+    "            if row[col_rem_assign] > 0:\n",
     "                reviewer_idx = find_reviewer(reviewers_df, is_preference, is_already_assigned, curr_sub)\n",
     "                logging.info(\n",
     "                    f'Assigning submission {curr_sub} to reviewer {reviewers_df.loc[reviewer_idx, Col.pretalx_user]}'\n",
     "                )\n",
     "                reviewers_df.loc[reviewer_idx, Col.curr_assignments].append(curr_sub)\n",
-    "                reviewers_df.loc[reviewer_idx, col_new_assign] += 1\n",
+    "                reviewers_df.loc[reviewer_idx, col_n_assigned] += 1\n",
     "                subs_df.loc[row_idx, col_rem_assign] -= 1\n",
     "\n",
-    "    return reviewers_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# new becomes old...\n",
-    "OLD_ASSIGNMENT_FILE = 'assignments_20231228_1.json'\n",
-    "NEW_ASSIGNMENT_FILE = 'assignments_20231228_2.json'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Let's give everyone only 10 assignments at first for checking out\n",
-    "# the Pretalx UI and before the actual reviewing phase starts\n",
-    "initial_assign_df = assign_proposals(subs_df, reviewers_df, buffer=BUFFER_REVIEWS)\n",
-    "initial_assign_df[Col.curr_assignments] = initial_assign_df[Col.curr_assignments].apply(lambda x: x[:10])\n",
-    "save_assignments_as_json(initial_assign_df, OLD_ASSIGNMENT_FILE)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# We re-run after the official review phase has started. Later always replace old with new assignment file etc.\n",
-    "curr_assign_df = read_assignment_as_df(OLD_ASSIGNMENT_FILE).set_index(Col.email)\n",
-    "reviewers_with_curr_assign_df = copy_df(reviewers_df).set_index(Col.email)\n",
-    "reviewers_with_curr_assign_df[Col.curr_assignments] = curr_assign_df\n",
-    "reviewers_with_curr_assign_df.reset_index(inplace=True)\n",
-    "new_assign_df = assign_proposals(subs_df, reviewers_with_curr_assign_df, buffer=BUFFER_REVIEWS)\n",
-    "save_assignments_as_json(new_assign_df, NEW_ASSIGNMENT_FILE)"
+    "    return reviewers_df\n",
+    "\n",
+    "new_assign_df = assign_proposals(subs_df, reviewers_df, buffer=BUFFER_REVIEWS)\n",
+    "\n",
+    "# add people that want all proposals assigned again\n",
+    "all_subs_df = pd.DataFrame({Col.email: assign_all_emails, Col.curr_assignments: [all_sub_codes] * len(assign_all_emails)})\n",
+    "new_assign_df = pd.concat([new_assign_df, all_subs_df]).reset_index()\n",
+    "\n",
+    "save_assignments_as_json(new_assign_df, 'assignments_20240112_1.json')"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {