In [None]:
pip install mip pandas openpyxl

In [None]:
from tkinter import filedialog
from tkinter import *
root = Tk()
root.withdraw()

import pandas as pd

import os
import openpyxl

#from mip import Model, xsum, BINARY, OptimizationStatus
from mip import Model, xsum, BINARY, INTEGER, MINIMIZE, OptimizationStatus


# Input

In [None]:
filepath_projects = filedialog.askopenfilename(title="Select file listing Topics (projects)")
print(filepath_projects)

sheet_name = 'Topics'
try:
    df_projects = pd.read_excel(filepath_projects, sheet_name=sheet_name)
    print("'Projects' Data loaded successfully!")
except Exception as e:
    print(f"An error occurred: {e}")

#print(df_projects)


sheet_name = 'Subject Areas'
try:
    df_subject_areas = pd.read_excel(filepath_projects, sheet_name=sheet_name)
    print("'Subject Areas' Data loaded successfully!")
except Exception as e:
    print(f"An error occurred: {e}")

#print(df_subject_areas)

In [None]:
filepath_supervisors = filedialog.askopenfilename(title="Select file listing Supervisors")
print(filepath_supervisors)

sheet_name = 'Supervisors'
try:
    df_supervisors = pd.read_excel(filepath_supervisors, sheet_name=sheet_name)
    print("'Supervisors' Data loaded successfully!")
except Exception as e:
    print(f"An error occurred: {e}")


In [None]:
filepath_project_allocations = filedialog.askopenfilename(title="Select file listing the Topic Allocation Results (Internal)")
print(filepath_project_allocations)

# Read the 'Student_allocations' sheet from the 'allocation results' file into a pandas DataFrame
sheet_name = 'Final Allocations'
try:
    df_project_allocations = pd.read_excel(filepath_project_allocations, sheet_name=sheet_name)
    print(f"{sheet_name} data loaded successfully!")
except Exception as e:
    print(f"An error occurred while loading {sheet_name} data: {e}")

    
# Read the 'Supervisor_allocations' sheet from the 'allocation results' file into a pandas DataFrame
sheet_name = 'Supervisor Popularity'
try:
    df_supervisor_allocations = pd.read_excel(filepath_project_allocations, sheet_name=sheet_name)
    print(f"{sheet_name} data loaded successfully!")
except Exception as e:
    print(f"An error occurred while loading {sheet_name} data: {e}")

# 2nd Reader Allocation Optimisation

In [None]:
# =========================================================
# Create List of Allocation Projects and List of their Indices
projects_allocated = []
for index, row in df_project_allocations.iterrows():
    project_code = row['Code']
    project_supervisor = row['Supervisor']
    #project_subject_areas = row['Subject Area']
    project_subject_areas_str = row['Subject Area']
    project_subject_areas = project_subject_areas_str.split(";#")

    project_student = row['Student']
    
    projects_allocated.append({"Code": project_code, 'Subject Area': project_subject_areas, "Supervisor": project_supervisor, "Student": project_student})

#print(projects_allocated)


# =========================================================
# Create List of Supervisors and their existing Supervision Load and 2nd Reader Allowance

supervisors = []

for index, row in df_supervisors.iterrows():
    supervisor_name = row['Supervisor']
    # NOTE: Not sure this is necessary
    if supervisor_name == "TOPIC AREA":
        continue

    supervisor_short = row['Supervisor_short']
    supervisor_subject_areas_str = row['Subject_area']
    supervisor_subject_areas = supervisor_subject_areas_str.split("; ")
    supervisor_eligibility = (True if row['2nd_reader'] == 1 else False)
    supervisor_load_row = df_supervisor_allocations.loc[df_supervisor_allocations["Supervisor"] == supervisor_name, "Count Allocated"]
    if supervisor_load_row.empty:
        supervisor_load = 0
    else:
        supervisor_load = supervisor_load_row.iloc[0]
    #supervisor_load = row['Count Allocated']
    #supervisor_subject_areas_str = row['Subject Area']
    #supervisor_subject_areas = supervisor_subject_areas_str.split("; ")
    supervisor_2nd_reader = row['2nd_reader']
    
    supervisors.append({'Supervisor': supervisor_name, 
                        'Supervisor_short': supervisor_short,
                        'Subject Area': supervisor_subject_areas,
                        'Load': supervisor_load,
                        '2nd Reader': supervisor_2nd_reader,
                        'Eligibility': supervisor_eligibility})

#print(supervisors)

In [None]:

"""
academics: list of dicts with keys:
    - 'name': str
    - 'subjects': list or set of subject area names
    - 'eligible': bool
projects: list of dicts with keys:
    - 'id': unique id (int or str)
    - 'supervisor': academic name
    - 'subjects': list or set of subject area names
"""

academics = []
for supervisor in supervisors:
    academic_name = supervisor['Supervisor']
    academic_subject_areas = supervisor['Subject Area']
    academic_eligibility = supervisor['Eligibility']
    academics.append({'name': academic_name, 
                      'subjects': academic_subject_areas, 
                      'eligible': academic_eligibility})

projects = []
for i, project in enumerate(projects_allocated):
    project_id = i
    project_code = project['Code']
    project_supervisor = project['Supervisor']
    project_subject_areas = project['Subject Area']
    projects.append({'id': project_id,
                     'code': project_code, 
                     'supervisor': project_supervisor, 
                     'subjects': project_subject_areas})

# Normalize
for a in academics:
    a['subjects'] = set(a.get('subjects', []))
for p in projects:
    p['subjects'] = set(p.get('subjects', []))
    if 'id' not in p:
        p['id'] = str(id(p))

name_to_idx = {a['name']: i for i, a in enumerate(academics)}
A = len(academics)
P = len(projects)

# Count supervision
supervised_count = [0]*A
for p in projects:
    sup = p['supervisor']
    supervised_count[name_to_idx[sup]] += 1

# Eligible pairings
can_be_second = [[False]*P for _ in range(A)]
for a_idx, a in enumerate(academics):
    for p_idx, p in enumerate(projects):
        if (a['eligible']
            and a['name'] != p['supervisor']
            and len(a['subjects'] & p['subjects']) > 0):
            can_be_second[a_idx][p_idx] = True

# Build model
m = Model(sense=MINIMIZE)

# Decision vars: x[a,p] = 1 if academic a is second reader for project p
x = [[None]*P for _ in range(A)]
for a in range(A):
    for p in range(P):
        if can_be_second[a][p]:
            x[a][p] = m.add_var(var_type=BINARY, name=f"x_{a}_{p}")

# Each project exactly one second reader
for p in range(P):
    m += xsum(x[a][p] for a in range(A) if x[a][p] is not None) == 1

# Total load per academic
total_load = [m.add_var(var_type=INTEGER, lb=0, name=f"load_{a}") for a in range(A)]
for a in range(A):
    m += total_load[a] == supervised_count[a] + xsum(
        x[a][p] for p in range(P) if x[a][p] is not None
    )

# Fairness objective: minimize sum of deviations from mean (eligible academics only)
eligible_idxs = [a for a in range(A) if academics[a]['eligible']]
total_supervised = sum(supervised_count[a] for a in eligible_idxs)
avg_load = (total_supervised + P) / len(eligible_idxs)

deviations = []
for a in eligible_idxs:
    d = m.add_var(var_type=INTEGER, lb=0, name=f"dev_{a}")
    deviations.append(d)
    # |total_load[a] - avg_load| <= d
    m += total_load[a] - avg_load <= d
    m += avg_load - total_load[a] <= d

m.objective = xsum(deviations)

m.optimize()

# Extract assignment
assignment = {}
for p_idx, p in enumerate(projects):
    assigned = None
    for a_idx in range(A):
        if x[a_idx][p_idx] is not None and x[a_idx][p_idx].x >= 0.5:
            assigned = academics[a_idx]['name']
            break
    assignment[p['id']] = assigned

loads = {academics[a]['name']: int(total_load[a].x) for a in range(A)}
supervised = {academics[a]['name']: supervised_count[a] for a in range(A)}

results = {
    'assignment': assignment,
    'supervised': supervised,
    'loads': loads,
    'model': m}

print('2nd Reader Assignments:')
print(results['assignment'])
print('Supervision Loads:')
print(results['supervised'])
print('Total Loads:')
print(results['loads'])


"""
# ----------------------------
# Example usage
if __name__ == "__main__":
academics = [
    {'name': 'Alice', 'subjects': ['A','B'], 'eligible': True},
    {'name': 'Bob',   'subjects': ['B','C'], 'eligible': True},
    {'name': 'Carol', 'subjects': ['C'],     'eligible': False},
    {'name': 'Dave',  'subjects': ['A','C'], 'eligible': True},
]

projects = [
    {'id': 'P1', 'supervisor': 'Alice', 'subjects': ['A']},
    {'id': 'P2', 'supervisor': 'Bob',   'subjects': ['B']},
    {'id': 'P3', 'supervisor': 'Carol', 'subjects': ['C']},
    {'id': 'P4', 'supervisor': 'Alice', 'subjects': ['A','B']},
]

res = allocate_second_readers_balanced(academics, projects)
print("Assignments (project -> second reader):")
for pid, reader in res['assignment'].items():
    print(f"  {pid} -> {reader}")
print("\nSupervised counts:")
for name, c in res['supervised'].items():
    print(f"  {name}: {c}")
print("\nTotal loads (supervised + second-read):")
for name, load in res['loads'].items():
    print(f"  {name}: {load}")
"""


In [None]:
#===============================
# Create List of Project Markers
projects_markers = []

for project in projects:
    project_id = project['id']
    project_code = project['code']
    project_supervisor = project['supervisor']
    project_subject_areas = list(project['subjects'])
    project_second_reader = results['assignment'][project_id]

    project_markers = {'Code':project_code, 'Subject Area': project_subject_areas, 'Supervisor':project_supervisor,'2nd Reader':project_second_reader}
    projects_markers.append(project_markers)

print("Project Markers:")
print("   ", projects_markers)



#=========================
# Test results are logical

# Check Second Reader is not the same as Supervisor
errorIDs = []
for index, project_markers in enumerate(projects_markers):
    project_supervisor = project_markers["Supervisor"]
    project_second_reader = project_markers["2nd Reader"]
    
    if project_supervisor == project_second_reader:
        errorIDs.append(index)

if not errorIDs:
    print (f"All 2nd readers are different from the Supervisors")
else:
    for errorID in errorIDs:
        project = project_markers[errorID]
        print(f"Project {project['Code']} has the same person as Supervisor and 2nd Reader ({project['Supervisor']})")


#  Check Second Reader has the project's subject Area:
errorIDs = []
for index, project_markers in enumerate(projects_markers):
    project_subject_areas = project_markers['Subject Area']
    second_reader = project_markers["2nd Reader"]
    if pd.isnull(second_reader):
        errorIDs.append(index)
        continue
    second_marker_subject_areas = next((supervisor for supervisor in supervisors if supervisor['Supervisor'] == second_reader), None)
    if any(subject_area in second_marker_subject_areas for  subject_area in project_subject_areas):
        errorIDs.append(index)

#print(errorIDs)

if not errorIDs:
    print (f"All 2nd readers have interest in the allocated projects' subject areas.")
else:
    for errorID in errorIDs:
        project = projects_markers[errorID]
        if pd.isnull(project['2nd Reader']):
            print(f"Project {project['Code']} has the 2nd Reader {project['2nd Reader']}")
        else:
            print(f"Project {project['Code']} has the 2nd Reader ({project['2nd Reader']}) who is not interested in {project_subject_areas}")




# Save Results

In [None]:
# ===========================================
# CREATE DATAFRAME OF READER ALLOCATION

# Create new DataFrame to save preferences as table
reader_allocation_columns = ['Project Code', 'Subject Area', 'Supervisor', '2nd Reader']
print(reader_allocation_columns)

reader_allocation_df = pd.DataFrame(columns=reader_allocation_columns)

# Add data to DataFrame
for project_markers in projects_markers:

    subject_area_str = "; ".join(project_markers['Subject Area'])

    reader_allocation_row = pd.DataFrame({'Project Code' : project_markers['Code'],
                            'Subject Area' : [subject_area_str],
                            'Supervisor': project_markers['Supervisor'],
                            '2nd Reader' : project_markers['2nd Reader']})
    
    reader_allocation_df = pd.concat([reader_allocation_df, reader_allocation_row], ignore_index=True)

print(reader_allocation_df)



# ===========================================
# CREATE DATAFRAME OF SUMMARY STATISTICS

# Create new DataFrame to save preferences as table
results_columns = ['Staff Name', 'Count 1st Reader', 'Count 2nd Reader', 'Count Total']
print(results_columns)

results_df = pd.DataFrame(columns=results_columns)


# Add data to DataFrame
for supervisor in supervisors:

    supervisor_name = supervisor["Supervisor"]
    count_supervisor = (reader_allocation_df["Supervisor"] == supervisor_name).sum()
    count_2nd_reader = (reader_allocation_df["2nd Reader"] == supervisor_name).sum()
    count_total = count_supervisor + count_2nd_reader

    results_row = pd.DataFrame({'Staff Name' : [supervisor_name],
                           'Count 1st Reader' : count_supervisor,
                           'Count 2nd Reader' : count_2nd_reader, 
                           'Count Total' : count_total})
                
    results_df = pd.concat([results_df, results_row], ignore_index=True)


if 'NONE' in results_df.columns:
    results_df.drop(columns=['NONE'], inplace=True)

results_df = results_df.sort_values(by='Staff Name')

print(results_df)


In [None]:
from openpyxl.worksheet.table import Table, TableStyleInfo

#Set path for the results XLSX file:
file_name_readers = 'CEE_Thesis_25-26_Final_Reader_Allocation.xlsx'
print(file_name_readers)

folder_results = os.path.dirname(os.path.abspath(filepath_project_allocations))
file_path_readers = os.path.join(folder_results,file_name_readers)
print (file_path_readers)

wb = openpyxl.Workbook()
wb.save(file_path_readers)

# Use ExcelWriter to write to the specified filename
with pd.ExcelWriter(file_path_readers, mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    #===============================
    # Add Final Allocations Worksheet:
    reader_allocation_df.to_excel(writer, sheet_name='Final Allocations', index=False)

    # Load workbook and worksheet
    wb = writer.book
    ws = wb["Final Allocations"]

    # Define Table Range
    table = Table(displayName="Table1", ref=ws.dimensions)

    # Apply Table Style
    style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                           showLastColumn=False, showRowStripes=True, showColumnStripes=False)
    table.tableStyleInfo = style

    # Add table to worksheet
    ws.add_table(table)


    #===============================
    # Add Results Summary Worksheet:
    results_df.to_excel(writer, sheet_name='Statistics', index=False)

    # Load workbook and worksheet
    wb = writer.book
    ws = wb["Statistics"]

    # Define Table Range
    table = Table(displayName="Table2", ref=ws.dimensions)

    # Apply Table Style
    style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                           showLastColumn=False, showRowStripes=True, showColumnStripes=False)
    table.tableStyleInfo = style

    # Add table to worksheet
    ws.add_table(table)

    #wb.save(file_path_results)


print(f"Sheet 'Final Allocations' has been successfully written to {file_path_readers}")

wb = openpyxl.load_workbook(file_path_readers)
print(wb.sheetnames)
defaultSheet = wb['Sheet']
wb.remove(defaultSheet)
wb.save(file_path_readers)

# Old Stuff

In [None]:
'''
# --- Example INPUT data--- 
# Projects indices (0..P-1)
num_projects = 10
projects_ids = list(range(10))

# Supervisor ID for each project
project_supervisorIDs = [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]

# Subject Area ID of each project
project_areas = [0, 1, 2, 0, 1, 2, 1, 0, 2, 0]

# Supervisors who CANNOT second read and are EXCLUDED from balancing
no_second_reading = {1, 4}
'''

# List of Indices of Allocated Projects
#projects_ids = df_student_allocations.index.tolist()
num_projects = len(projects_allocated)
projects_ids = list(range(num_projects))

print("Project Indices:")
print(projects_ids)


# =========================================
# Create List of Supervisor IDs of Allocated Projects. IDs are from the original list of Supervisors
project_supervisorIDs = []
for project in projects_allocated:
    project_supervisor = project['Supervisor']
    supervisor_id = df_supervisors.index[df_supervisors["Supervisor"] == project_supervisor].tolist()[0]
    project_supervisorIDs.append(supervisor_id)

print("Project Supervisor IDs:")
print(project_supervisorIDs)


# =========================================
# Create List of Subject Areas of Allocated Projects.
project_areas = []
for project in projects_allocated:
    project_code = project['Code']
    project_subject_areas_str = df_project_allocations.loc[df_project_allocations["Code"] == project_code, "Subject Area"].iloc[0]
    if pd.isnull(project_subject_areas_str):
        print(f"Project with Code {project_code} does not exist")
    project_subject_areas = project_subject_areas_str.split(";#")

    #project_areas.append(project_subject_areas)

    # Get first Topic Area in list
    project_subject_area = project_subject_areas[0]
    project_areas.append(project_subject_area)

print("Project Subject Area IDs:")
print(project_areas)


# =========================================
# Create List of Subject Areas
subject_areas = []
for index, row in df_subject_areas.iterrows():
    subject_area = row['Subject Areas']

    subject_areas.append(subject_area)

#print(subject_areas)





In [None]:
'''
# --- Example INPUT data ---
# Supervisors indexed 0..S-1
supervisors_ids = list(range(num_supervisors))

# Existing supervision load (already assigned)
supervisors_loads = [2, 3, 1, 0, 4]

# Each supervisor's subject areas of interest (set of area IDs)
supervisors_subject_areas_ids = [
    {0, 1},  # supervisor 0
    {1, 2},  # supervisor 1
    {0, 2},  # supervisor 2
    {0, 1},  # supervisor 3
    {1, 2}   # supervisor 4
]

# Supervisors who CANNOT second read and are EXCLUDED from balancing
no_second_reading = {1, 4}

'''

# Supervisors indexed 0..S-1
num_supervisors = len(supervisors)
supervisors_ids = list(range(num_supervisors))

print("Supervisor IDs:")
print(supervisors_ids)


# Existing supervision load (already assigned)
supervisors_loads = []
for supervisor in supervisors:
    supervisors_loads.append(supervisor["Load"])

print("Supervisor Supervision Loads:")
print(supervisors_loads)


# -----------------------------
# Example INPUT data
# Each supervisor's subject areas of interest (set of area IDs)
supervisors_subject_areas_ids = []

for supervisor in supervisors:
    supervisor_subject_areas = supervisor["Subject Area"]
    indices = {subject_areas.index(subject) for subject in supervisor_subject_areas}
    supervisors_subject_areas_ids.append(indices)

print("Supervisor Subject Areas:")
print(supervisors_subject_areas_ids)


# Supervisors who CANNOT second read and are EXCLUDED from balancing
no_second_reading = []
for i, supervisor in enumerate(supervisors):
    if supervisor['2nd Reader'] == 0:
        no_second_reading.append(i)

#no_second_reading = set(no_second_reading)

print("Supervisor No Second Reading:")
print(no_second_reading)


In [None]:
# -----------------------------------------------------
# Stage 1: minimise max load among eligible supervisors
# -----------------------------------------------------

m1 = Model()

S = len(supervisors_ids)
P = len(projects_ids)

# Decision vars: x[s][p] = 1 if s is second reader for p
x = [[m1.add_var(var_type=BINARY) for p in projects_ids] for s in supervisors_ids]

# Max load variable (only applies to eligible supervisors)
L = m1.add_var()

# Each project gets exactly one second reader
for p in projects_ids:
    m1 += xsum(x[s][p] for s in supervisors_ids) == 1

# No self-reading + no-second-reading supervisors
for p in projects_ids:
    m1 += x[project_supervisorIDs[p]][p] == 0
    for s in no_second_reading:
        m1 += x[s][p] == 0

# Load constraints — only for eligible supervisors
for s in supervisors_ids:
    if s not in no_second_reading:
        total_load = supervisors_loads[s] + xsum(x[s][p] for p in projects_ids)
        m1 += total_load <= L

# Stage 1 objective: minimise L
m1.objective = L

m1.verbose = 0
status = m1.optimize()

if status not in (OptimizationStatus.OPTIMAL, OptimizationStatus.FEASIBLE):
    raise RuntimeError("Stage 1 failed to find a solution")

L_star = L.x
print(f"Stage 1: Minimum possible max load (eligible supervisors only) = {L_star}")



# --------------------------------------------
# Stage 2: fix L and minimise subject area of interest penalty
# --------------------------------------------

m2 = Model()

x2 = [[m2.add_var(var_type=BINARY) for p in projects_ids] for s in supervisors_ids]

# Each project exactly one second reader
for p in projects_ids:
    m2 += xsum(x2[s][p] for s in supervisors_ids) == 1

# No self-reading + no-second-reading supervisors
for p in projects_ids:
    m2 += x2[project_supervisorIDs[p]][p] == 0
    for s in no_second_reading:
        m2 += x2[s][p] == 0

# Load constraints with fixed L_star (only for eligible supervisors)
for s in supervisors_ids:
    if s not in no_second_reading:
        total_load = supervisors_loads[s] + xsum(x2[s][p] for p in projects_ids)
        m2 += total_load <= L_star

# Penalty for out-of-interest assignments
penalty_weight = 1
penalty = xsum(
    penalty_weight * x2[s][p]
    for s in supervisors_ids
    for p in projects_ids
    if project_areas[p] not in supervisors_subject_areas_ids[s]
)

# Stage 2 objective: minimise penalty
m2.objective = penalty

m2.verbose = 0
status = m2.optimize()

if status in (OptimizationStatus.OPTIMAL, OptimizationStatus.FEASIBLE):
    print(f"\nStage 2: Interest penalty = {penalty.x}")
    print(f"Max load (eligible supervisors only) = {L_star}")
    for p in projects_ids:
        for s in supervisors_ids:
            if x2[s][p].x >= 0.99:
                in_interest = project_areas[p] in supervisors_subject_areas_ids[s]
                tag = "(in interest)" if in_interest else "(out of interest)"
                print(f"Project {p} -> Second reader {s} {tag}")
else:
    print("Stage 2 failed to find a solution")

In [None]:
#===============================
# Create List of Project Markers
projects_markers = []

for p in range(num_projects):
    project = projects_allocated[p]
    project_code = project['Code']
    project_subject_areas = project['Subject Area']
    project_supervisor = project['Supervisor']

    for s in range(num_supervisors):
        if x[s][p].x >= 0.99:
            project_second_reader = supervisors[s]['Supervisor']

            project_markers = {'Code':project_code, 'Subject Area': project_subject_areas, 'Supervisor':project_supervisor,'2nd Reader':project_second_reader}
            projects_markers.append(project_markers)
            #print(f"Project {p} ({project_code}) -> Second reader {s} ({second_reader})")

print(projects_markers)



#=========================
# Test results are logical

# Check Second Reader is not the same as Supervisor
errorIDs = []
for index, project_markers in enumerate(projects_markers):
    project_supervisor = project_markers["Supervisor"]
    project_second_reader = project_markers["2nd Reader"]
    
    if project_supervisor == project_second_reader:
        errorIDs.append(index)

if not errorIDs:
    print (f"All 2nd readers are different from the Supervisors")
else:
    for errorID in errorIDs:
        project = project_markers[errorID]
        print(f"Project {project['Code']} has the same person as Supervisor and 2nd Reader ({project['Supervisor']})")


#  Check Second Reader has the project's subject Area:
errorIDs = []
for index, project_markers in enumerate(projects_markers):
    project_subject_areas = project_markers['Subject Area']
    second_reader = project_markers["2nd Reader"]
    second_marker_subject_areas = next((supervisor for supervisor in supervisors if supervisor['Supervisor'] == second_reader), None)

    if any(subject_area in second_marker_subject_areas for  subject_area in project_subject_areas):
        errorIDs.append(index)

if not errorIDs:
    print (f"All 2nd readers have interest in the allocated projects' subject areas.")
else:
    for errorID in errorIDs:
        project = project_markers[errorID]
        print(f"Project {project['Code']} has the 2nd Reader ({project['2nd Reader']}) who is not interested in {project_subject_areas}")



In [None]:
"""
    academics: list of dicts with keys:
        - 'name': str
        - 'subjects': set or list of subject area names
        - 'eligible': bool (eligible to be second reader)
    projects: list of dicts with keys:
        - 'id': unique id (int or str) -- if missing the index is used
        - 'supervisor': academic name (must match an academics['name'])
        - 'subjects': set or list of subject area names

    Returns:
        dict with project_id -> second_reader_name, and load info.
    """
academics = []
for supervisor in supervisors:
    academic_name = supervisor['Supervisor']
    academic_subject_areas = supervisor['Subject Area']
    academic_eligibility = supervisor['Eligibility']
    academics.append({'name': academic_name, 
                      'subjects': academic_subject_areas, 
                      'eligible': academic_eligibility})

projects = []
for i, project in enumerate(projects_allocated):
    project_id = i
    project_code = project['Code']
    project_supervisor = project['Supervisor']
    project_subject_areas = project['Subject Area']
    projects.append({'id': project_id,
                     'code': project_code, 
                     'supervisor': project_supervisor, 
                     'subjects': project_subject_areas})


# Normalize subjects to sets
for a in academics:
    a['subjects'] = set(a.get('subjects', []))
for p in projects:
    p['subjects'] = set(p.get('subjects', []))



# Map names -> index
name_to_idx = {a['name']: i for i, a in enumerate(academics)}
A = len(academics)
P = len(projects)

# ensure each project has an id
for i, p in enumerate(projects):
    if 'id' not in p:
        p['id'] = i

# Precompute supervised counts per academic
supervised_count = [0]*A
for p in projects:
    sup = p['supervisor']
    if sup not in name_to_idx:
        raise ValueError(f"Supervisor '{sup}' not found in academics list.")
    supervised_count[name_to_idx[sup]] += 1

# Build eligible matrix: can academic a be second reader for project p?
# Conditions: academic eligible, shares >=1 subject area with project, and not the supervisor
can_be_second = [[False]*P for _ in range(A)]
for a_idx, a in enumerate(academics):
    for p_idx, p in enumerate(projects):
        if (a['eligible']
            and (a['name'] != p['supervisor'])
            and (len(a['subjects'].intersection(p['subjects'])) > 0)):
            can_be_second[a_idx][p_idx] = True

# Build model
m = Model(sense=MINIMIZE)

# Variables x[a,p] in {0,1} assign project p to academic a as second reader
x = [[None]*P for _ in range(A)]
for a in range(A):
    for p in range(P):
        if can_be_second[a][p]:
            x[a][p] = m.add_var(var_type=BINARY, name=f"x_{a}_{p}")
        else:
            x[a][p] = None

# Each project assigned exactly once
for p in range(P):
    m += xsum(x[a][p] for a in range(A) if x[a][p] is not None) == 1, f"assign_proj_{p}"

# Define total load per academic: supervised_count + sum_p x[a,p]
# We'll model them as integer variables (they are integer by construction), and
# use M (max) and m_var (min) to minimize the range
total_load = [m.add_var(var_type=INTEGER, lb=0, name=f"load_{a}") for a in range(A)]
for a in range(A):
    assigned_sum = xsum(x[a][p] for p in range(P) if x[a][p] is not None)
    # total_load[a] == supervised_count[a] + assigned_sum
    m += total_load[a] == supervised_count[a] + assigned_sum, f"load_def_{a}"

# Max and min load
M = m.add_var(var_type=INTEGER, lb=0, name="M")
m_var = m.add_var(var_type=INTEGER, lb=0, name="m_var")

for a in range(A):
    if academics[a]['eligible']:
        m += total_load[a] <= M
        m += total_load[a] >= m_var

# Objective: minimize range (M - m_var)
m.objective = M - m_var

# Optionally: you can set a time limit or mip gap here:
# m.max_gap = 0.01   # 1% optimality gap allowed
# m.max_seconds = 30

m.optimize()

# Extract assignment
assignment = {}
for p_idx, p in enumerate(projects):
    assigned = None
    for a_idx in range(A):
        var = x[a_idx][p_idx]
        if var is not None and var.x >= 0.5:
            assigned = academics[a_idx]['name']
            break
    assignment[p['id']] = assigned

# Load info
loads = {academics[a]['name']: int(total_load[a].x) for a in range(A)}
supervised = {academics[a]['name']: supervised_count[a] for a in range(A)}

results = {
    'assignment': assignment,
    'supervised': supervised,
    'loads': loads,
    'model': m}

print('2nd Reader Assignments:')
print(results['assignment'])
print('Supervision Loads:')
print(results['supervised'])
print('Total Loads:')
print(results['loads'])



'''
# ----------------------------
# Example usage:
if __name__ == "__main__":
    academics = [
        {'name': 'Alice', 'subjects': ['A','B'], 'eligible': True},
        {'name': 'Bob',   'subjects': ['B','C'], 'eligible': True},
        {'name': 'Carol', 'subjects': ['C'],     'eligible': False},
        {'name': 'Dave',  'subjects': ['A','C'], 'eligible': True},
    ]

    projects = [
        {'id': 'P1', 'supervisor': 'Alice', 'subjects': ['A']},
        {'id': 'P2', 'supervisor': 'Bob',   'subjects': ['B']},
        {'id': 'P3', 'supervisor': 'Carol', 'subjects': ['C']},
        {'id': 'P4', 'supervisor': 'Alice', 'subjects': ['A','B']},
    ]

    res = allocate_second_readers(academics, projects)
    print("Assignments (project -> second reader):")
    for pid, reader in res['assignment'].items():
        print(f"  {pid} -> {reader}")
    print("\nSupervised counts:")
    for name, c in res['supervised'].items():
        print(f"  {name}: {c}")
    print("\nTotal loads (supervised + second-read):")
    for name, load in res['loads'].items():
        print(f"  {name}: {load}")
'''