In [None]:
import os, pandas as pd, numpy as np
from pathlib import Path

root = Path('..')/'Screening_1'/'outputs'
need = ['paper_id','title','doi','reviewer','decision']
frames = [pd.read_csv(root/f, usecols=lambda c: c in need) for f in ['Olav_decisions.csv','Ulrik_decisions.csv','Trine_decisions.csv'] if (root/f).exists()]
if (root/'resolved_maybes.csv').exists():
    m = pd.read_csv(root/'resolved_maybes.csv', usecols=lambda c: c in ['paper_id','title','doi','original_reviewer','decision'])
    frames.append(m[m['decision']=='Y'].rename(columns={'original_reviewer':'reviewer'})[need])
all_df = pd.concat(frames, ignore_index=True)
all_df = all_df[all_df['decision']=='Y'].drop_duplicates('paper_id', keep='last')

REVIEWERS = ['Olav','Ulrik','Trine']
quota = {r: len(all_df)//3 for r in REVIEWERS}
for r in REVIEWERS[:len(all_df)%3]: quota[r]+=1
shuf = all_df.sample(frac=1, random_state=np.random.randint(0,1_000_000)).reset_index(drop=True)
assignees = []
for _, row in shuf.iterrows():
    opts = [r for r in REVIEWERS if r!=row['reviewer'] and quota[r]>0] or [r for r in REVIEWERS if quota[r]>0]
    pick = np.random.choice(opts); assignees.append(pick); quota[pick]-=1
out = shuf.assign(assignee=assignees)

out_dir = Path('.')
for r in REVIEWERS:
    df = out[out['assignee']==r][['title','doi','reviewer','assignee']].rename(columns={'doi':'DOI','reviewer':'original reviewer','assignee':'new reviewer'})
    (out_dir/f'{r}_screening2.csv').write_text(df.to_csv(index=False), encoding='utf-8')
print({r: int((out['assignee']==r).sum()) for r in REVIEWERS})


Generate new assignments assigning papers after initial screening 2 to reviewers:

In [None]:
import os, pandas as pd, numpy as np
from pathlib import Path
import pandas as pd
initial_screening_2_path = Path('..')/'Screening_1'/'outputs'/'initial_screening_2'
root = Path('..')/'Screening_1'/'outputs'/'Screening_2_initial_screening_2'
out_dir = Path('.')/'Screening_2_initial_screening_2'


REVIEWERS = ['Olav','Ulrik','Trine']

initial_screening_2 = {}
screening_2_merged = []

for reviewer in REVIEWERS:
    df = pd.read_csv(initial_screening_2_path/f'{reviewer}_decisions.csv')
    df = df[df['decision']=='Y'].drop_duplicates('paper_id', keep='last')
    initial_screening_2[reviewer] = df

    screening_2_merged.append(pd.read_csv(f'{reviewer}_screening2.csv'))

screening_2_merged = pd.concat([assignments for assignments in screening_2_merged])

initial_combined = pd.concat(initial_screening_2.values(), ignore_index=True)

initial_combined = pd.concat(initial_screening_2.values(), ignore_index=True)

def find_title_col(df):
    for c in df.columns:
        if 'title' == c.lower() or 'title' in c.lower():
            return c
    raise KeyError("No title-like column found")

title_col_init = find_title_col(initial_combined)
title_col_merged = find_title_col(screening_2_merged)

# normalize titles for robust matching
initial_combined['_title_norm'] = initial_combined[title_col_init].astype(str).str.lower().str.strip()
screening_2_merged['_title_norm'] = screening_2_merged[title_col_merged].astype(str).str.lower().str.strip()

# select rows present in initial_combined but NOT in screening_2_merged
mask = ~initial_combined['_title_norm'].isin(screening_2_merged['_title_norm'])
diff_df = initial_combined[mask].copy()

# ensure reviewer column is present and matches merged naming
# prefer to rename 'reviewer' -> 'original reviewer' if merged uses that
if 'original reviewer' in screening_2_merged.columns:
    if 'reviewer' in diff_df.columns:
        diff_df = diff_df.rename(columns={'reviewer': 'original reviewer'})
    elif 'original_reviewer' in diff_df.columns:
        diff_df = diff_df.rename(columns={'original_reviewer': 'original reviewer'})
    orig_rev_col = 'original reviewer'
else:
    # fallback to whatever reviewer column exists
    if 'reviewer' in diff_df.columns:
        orig_rev_col = 'reviewer'
    elif 'original_reviewer' in diff_df.columns:
        diff_df = diff_df.rename(columns={'original_reviewer': 'reviewer'})
        orig_rev_col = 'reviewer'
    else:
        orig_rev_col = None

# keep only columns from initial_combined that also exist in screening_2_merged
common_cols = [c for c in diff_df.columns if c in screening_2_merged.columns]

# ensure title and original reviewer are kept
if title_col_init not in common_cols:
    common_cols = [title_col_init] + common_cols
if orig_rev_col and orig_rev_col not in common_cols:
    common_cols = [orig_rev_col] + common_cols

# final result: papers in initial_screening_2 but not in screening_2_merged
result = diff_df[common_cols].reset_index(drop=True)

# cleanup helper cols
initial_combined.drop(columns=['_title_norm'], inplace=True, errors='ignore')
screening_2_merged.drop(columns=['_title_norm'], inplace=True, errors='ignore')

# assign "new reviewer" different from original reviewer, balanced across REVIEWERS
REVIEWERS = ['Olav','Ulrik','Trine']
N = len(result)
quota = {r: N//3 for r in REVIEWERS}
for r in REVIEWERS[:N%3]: quota[r] += 1

new_assignees = []
for _, row in result.iterrows():
    orig = row.get(orig_rev_col) if orig_rev_col else None
    opts = [r for r in REVIEWERS if r != orig and quota[r] > 0]
    if not opts:
        opts = [r for r in REVIEWERS if quota[r] > 0]
    pick = np.random.choice(opts)
    new_assignees.append(pick)
    quota[pick] -= 1

result = result.assign(**{'new reviewer': new_assignees})

out_dir = Path('.') / 'Screening_2_initial_screening_2'
out_dir.mkdir(parents=True, exist_ok=True)

for r in REVIEWERS:
    df_r = result[result['new reviewer'] == r].copy()

    # normalize/rename columns to expected names
    # ensure original reviewer column exists as 'original reviewer'
    if orig_rev_col and orig_rev_col != 'original reviewer' and 'original reviewer' not in df_r.columns:
        if orig_rev_col in df_r.columns:
            df_r = df_r.rename(columns={orig_rev_col: 'original reviewer'})

    # ensure DOI column header is 'DOI'
    if 'doi' in df_r.columns and 'DOI' not in df_r.columns:
        df_r = df_r.rename(columns={'doi': 'DOI'})

    # define desired order: title, DOI, original reviewer, new reviewer
    title_col = title_col_init  # from earlier
    cols_order = [title_col, 'DOI', 'original reviewer', 'new reviewer']

    # add missing columns as empty so ordering works
    for c in cols_order:
        if c not in df_r.columns:
            df_r[c] = ''

    df_out = df_r[cols_order].copy()

    # write CSV directly (avoid write_text to prevent extra blank lines)
    fp = out_dir / f'{r}_screening2.csv'
    df_out.to_csv(fp, index=False, encoding='utf-8')

# report and list files
print(f"Wrote: {[str(f.name) for f in out_dir.glob('*_screening2.csv')]}")

Wrote: ['Olav_screening2.csv', 'Trine_screening2.csv', 'Ulrik_screening2.csv']
