In [1]:
import pandas as pd
import pulp
import re
from rapidfuzz import process, fuzz
import csv

In [2]:
df = pd.read_csv('/home/dutchr/python_code/scripts/parent_camp_info/camp_info/Year 10 Outdoor Experience 2025 - Group Selection Form (Responses).csv').astype('str')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107 entries, 0 to 106
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   Timestamp                     107 non-null    object
 1   Email address                 107 non-null    object
 2   Your First Name               107 non-null    object
 3   Your Surname                  107 non-null    object
 4   Choice 1 (First and Surname)  107 non-null    object
 5   Choice 2 (First and Surname)  107 non-null    object
 6   Choice 3 (First and Surname)  107 non-null    object
 7   Choice 4 (First and Surname)  107 non-null    object
 8   Choice 5 (First and Surname)  107 non-null    object
dtypes: object(9)
memory usage: 7.7+ KB


In [4]:
def clean_text(text):
    """
    Converts text to lowercase, strips leading/trailing whitespace,
    and replaces multiple whitespaces with a single space.
    """
    text = text.title().strip()
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    return text

In [5]:

student_preferences = {}
for index, row in df.iterrows():
    # Combine the first name and surname into a full name key
    full_name = f"{row['Your First Name']} {row['Your Surname']}".title().strip()
    # Collect all group choices into a list
    preferences = [
        clean_text(row['Choice 1 (First and Surname)']),
        clean_text(row['Choice 2 (First and Surname)']),
        clean_text(row['Choice 3 (First and Surname)']),
        clean_text(row['Choice 4 (First and Surname)']),
        clean_text(row['Choice 5 (First and Surname)'])
    ]
    preferences = [pref for pref in preferences if pref not in ('nan', '', 'Nan')]
    # Assign the list of preferences to the dictionary under the full name key
    student_preferences[full_name] = preferences

In [6]:
def correct_names(preferences, main_students):
    # Dictionary to store corrected preferences
    corrected_preferences = {}
    
    # Iterate through each student and their preferences
    for student, prefs in preferences.items():
        corrected_prefs = []
        for pref in prefs:
            # Find the best match for the misspelt name in the main_students list
            best_match = process.extractOne(pref, main_students, scorer=fuzz.WRatio)
            if best_match and best_match[1] > 80:  # You can adjust the threshold score
                corrected_prefs.append(best_match[0])
            else:
                corrected_prefs.append(pref)  # If no good match, keep the original
        corrected_preferences[student] = corrected_prefs
    
    return corrected_preferences

In [7]:
main_students = set(student_preferences.keys())

corrected_preferences = correct_names(student_preferences, main_students)

In [None]:
# Check if there are any students who are in preferences but not in the main student list
# Gather all preferences into a single set
all_preferences = set()
for preferences in corrected_preferences.values():
    all_preferences.update(preferences)

# Students who are in preferences but not in the main student list
students_only_in_preferences = all_preferences - main_students

students_only_in_preferences

In [15]:
not_attending = pd.read_csv('/home/dutchr/python_code/scripts/parent_camp_info/tent_pairing/empty not attending.csv').astype('str')

In [16]:
not_attending = not_attending['Absent'].str.title().str.strip().tolist()

In [17]:
students_only_in_preferences = [student for student in students_only_in_preferences if student not in not_attending]

In [18]:
students_only_in_preferences.sort()
students_only_in_preferences

['Alex Mathers',
 'Amarinder Singh',
 'Aria Ripley',
 'Beau Hogarth',
 'Beue Hogath',
 'Charli Kleeman',
 'Charlotte Nairn',
 'Charlotte Narin',
 'Connor Leverett',
 'Emalia Basic',
 'Hana Parry',
 'Imogen Brown',
 'Indiana Stacy',
 'Leonidas Sioutus',
 'Maddy Mckay',
 'Madeline Weeks',
 'Nick Kosmala',
 'Oluwatimilehin Bamise',
 'Shrays Butnagger',
 'Sophie Arozollo',
 'Sophie Arruzzolo',
 'Sophie Aruzzolo',
 'Timi Bamise',
 'Tyson Soraghan',
 'Venkata Kilari']

In [19]:
#remove students who are not attending from all preferences
for student, prefs in corrected_preferences.items():
    corrected_preferences[student] = [pref for pref in prefs if pref not in not_attending]

In [20]:
#add students_only_in_preferences to corrected_preferences dict as keys with empty lists as values
for student in students_only_in_preferences:
    corrected_preferences[student] = []


In [21]:
corrected_preferences

{'Elani  Stevens': ['Lily  Brain',
  'Alexia Moraitis',
  'Chanel Surace',
  'Kasia Nagy',
  'Imogen Brown'],
 'Grace Terrington': ['Jersi Jensen-Wallace',
  'Ekam Jammu',
  'Gurnaaz Kaur  Grewal'],
 'Ekam Jammu': ['Grace Terrington', 'Jersi Jensen-Wallace', 'Ella Field'],
 'Jersi Jensen-Wallace': ['Grace Terrington',
  'Gurnaaz Kaur  Grewal',
  'Ekam Jammu'],
 'Krish Tewar': ['Ryan Fritsch',
  'Gabriel Truscott',
  'Bailey Clark',
  'Joshua Doak',
  'Kiano Colebrook'],
 'Kiano Colebrook': ['Luke Mathers',
  'Joshua Doak',
  'Gabriel Truscott',
  'Bailey Clark',
  'Saxon Hall'],
 'Aliza  Patterson': ['Ella  Forster', 'Tahlia Waanders', 'Emalia Basic'],
 'Sienna Jaeschke': ['Indie Stacey',
  'Taylah Ross',
  'Sophia Chinca',
  'Cara Twigge',
  'Annalee Loechel'],
 'Luke Mathers': ['Kiano Colebrook',
  'Cameron Harrald',
  'Joshua Doak',
  'Bailey Clark',
  'Zoe Parcell'],
 'Xavier Trusz': ['Owen Doyle',
  'Daniel Cox',
  'Nicolas Dinevski',
  'Oluwatimilehin Bamise'],
 'Owen Doyle': ['X

In [22]:
def solve_pairing(preferences):
    # Create a problem variable:
    prob = pulp.LpProblem("Student_Pairing", pulp.LpMaximize)

    # Create variables for each possible pair
    pairs = {}
    all_students = set(preferences.keys()) | set(pref for prefs in preferences.values() for pref in prefs)
    for student in all_students:
        for preferred in all_students:
            if student != preferred:  # Avoid pairing students with themselves
                pair_name = f"pair_{student}|{preferred}"
                pairs[(student, preferred)] = pulp.LpVariable(pair_name, 0, 1, pulp.LpBinary)

    # Objective function: maximize sum of mutual preferences
    prob += pulp.lpSum(pairs[(s, p)] * ((len(preferences[s]) +1 - preferences[s].index(p) if p in preferences[s] else 0.5) +
                                        (len(preferences[s]) +1 - preferences[p].index(s) if s in preferences[p] else 0.5))
                       for s in preferences for p in all_students if (s, p) in pairs)

    # Constraint: Each student can be in at most one pair
    for s in all_students:
        prob += pulp.lpSum(pairs[(s, p)] for p in all_students if (s, p) in pairs) + \
                pulp.lpSum(pairs[(p, s)] for p in all_students if (p, s) in pairs) <= 1

    # Solve the problem
    prob.solve()

    # Extract the pairs
    results = []
    seen = set()
    for (s, p), var in pairs.items():
        if pulp.value(var) == 1 and s not in seen and p not in seen:
            results.append((s, p))
            seen.update([s, p])

    return results

In [23]:
resulting_pairs = solve_pairing(corrected_preferences)

Welcome to the CBC MILP Solver 
Version: 2.10.10 
Build Date: Sep 26 2023 

command line - /home/dutchr/miniconda3/envs/school/lib/python3.13/site-packages/pulp/solverdir/cbc/linux/arm64/cbc /tmp/145a3221facf4ac29eb37ba77ff15228-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /tmp/145a3221facf4ac29eb37ba77ff15228-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 137 COLUMNS
At line 86598 RHS
At line 86731 BOUNDS
At line 104024 ENDATA
Problem MODEL has 132 rows, 17292 columns and 34584 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 550.5 - 0.01 seconds
Cgl0004I processed model has 132 rows, 8646 columns (8646 integer (8646 of which binary)) and 17292 elements
Cutoff increment increased from 1e-05 to 0.4999
Cbc0038I Initial state - 12 integers unsatisfied sum - 6
Cbc0038I Solution found of 502.5
Cbc0038I Before mini branch and bound, 8634 integers at bound fi

In [None]:
# Specify the filename
filename = "output_camp2.csv"

# Writing to the csv file
with open(filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Optionally write a header
    writer.writerow(['Name1', 'Name2'])
    # Write data
    writer.writerows(resulting_pairs)

print(f"Data successfully written to {filename}")

In [24]:
all_students = set(corrected_preferences.keys())

In [25]:
paired = set()
for first_name, second_name in resulting_pairs:
    paired.add(first_name)
    paired.add(second_name)

In [26]:
all_students - paired

set()

In [27]:
resulting_pairs

[('Nicholas  Kosmala', 'Zade Bader'),
 ('Harrison  Connelly', 'Leo Sioutis'),
 ('Madison Mckay', 'Hannah  Thompson'),
 ('Fena Rakholiya', 'Charlotte Narin'),
 ('Cameron Harrald', 'Ryan Fritsch'),
 ('Alex Mathers', 'Imogen Brown'),
 ('Harry Tamblyn', 'Jonah Podlewski'),
 ('Andrzej Czechuara', 'Nectarios Georgiou'),
 ('Joshua Lam-Huynh', 'William  Mayne'),
 ('Cara Twigge', 'Indiana Stacy'),
 ('Ella  Forster', 'Aliza  Patterson'),
 ('Nicholas Walker', 'Oluwatimilehin Bamise'),
 ('Dylan Rands', 'Elijah Cockburn'),
 ('Ashton  Fenn', 'Connor Lengs'),
 ('Hanna  Abimosleh', 'Madeline Weeks'),
 ('Owen Doyle', 'Xavier Trusz'),
 ('Sake Demetri', 'Sophie Aruzzolo'),
 ('Hridya  Soni', 'Luke Schulz'),
 ('Samaira  Bath', 'Peyton Shaw'),
 ('Lily  Brain', 'Elani  Stevens'),
 ('Jordan  Zollo', 'Cooper Bache'),
 ('Grace Watts', 'Zara Walding'),
 ('Alexis Corbin', 'Ruby  Dolejs'),
 ('Ariana Loizou', 'Lucy   Haberfield'),
 ('Shreyas Bhatnagar', 'Venkata Kilari'),
 ('Jake  Do', 'Nick Kosmala'),
 ('Vaishnavi