In [15]:
import pandas as pd
import numpy as np
import time
import tracemalloc
from numba import njit, typed, types

# Define a tuple type for a pair of int64 values.
pair_type = types.Tuple((types.int64, types.int64))

# ============================================================
# Numba-accelerated helper functions
# ============================================================

@njit(cache=True)
def compute_upper_bound(mask, comp, n):
    """
    Given a bitmask 'mask' indicating which students are unmatched,
    compute an upper bound on the additional compatibility score achievable
    from the remaining students.
    """
    additional = 0.0
    for i in range(n):
        if mask & (1 << i):
            best_val = 0.0
            for j in range(n):
                if (mask & (1 << j)) and (i != j):
                    if comp[i, j] > best_val:
                        best_val = comp[i, j]
            additional += best_val
    return additional / 2.0

@njit(cache=True)
def clear_typed_list(typed_list):
    """
    Clears a Numba-typed list by popping all its elements.
    """
    count = len(typed_list)
    for _ in range(count):
        typed_list.pop()

@njit(cache=True)
def search(mask, current_score, comp, n, best_score, best_pairs, current_pairs, memo, allocations):
    """
    Recursive branch and bound search.
    
    Parameters:
      mask: Bitmask (np.int64) representing unmatched students.
      current_score: Accumulated compatibility score.
      comp: n x n NumPy array of compatibility scores.
      n: Total number of students.
      best_score: A 1-element NumPy array (mutable container) for the best score.
      best_pairs: A Numba-typed list (of 2-element tuples of int64) storing the best pairing.
      current_pairs: A Numba-typed list (of 2-element tuples of int64) storing the current pairing.
      memo: A Numba-typed dictionary mapping masks (int64) to float64 upper bounds.
      allocations: A 1-element NumPy array (of int64) used as a counter.
    """
    # Base case: all students paired.
    if mask == 0:
        if current_score > best_score[0]:
            best_score[0] = current_score
            clear_typed_list(best_pairs)
            for idx in range(len(current_pairs)):
                best_pairs.append(current_pairs[idx])
        return

    # Use memoization to prune.
    if mask in memo:
        if current_score + memo[mask] <= best_score[0]:
            return
    else:
        ub_val = compute_upper_bound(mask, comp, n)
        memo[mask] = ub_val
        if current_score + ub_val <= best_score[0]:
            return

    # Choose the first unmatched student (lowest-index bit in mask).
    i = 0
    while i < n:
        if mask & (1 << i):
            break
        i += 1

    new_mask = mask & ~(1 << i)
    
    # Build a NumPy array of candidate partner indices from new_mask.
    candidate_count = 0
    candidate_arr = np.empty(n, dtype=np.int64)
    for j in range(n):
        if new_mask & (1 << j):
            candidate_arr[candidate_count] = j
            candidate_count += 1
    candidate_arr = candidate_arr[:candidate_count]
    
    # Sort candidate indices in descending order of compatibility with student i.
    order_temp = np.argsort(comp[i, candidate_arr])
    m = order_temp.shape[0]
    order = np.empty(m, dtype=np.int64)
    for k in range(m):
        order[k] = order_temp[m - 1 - k]  # Reverse order.
    sorted_candidates = candidate_arr[order]
    
    # Try pairing student i with each candidate.
    for j in sorted_candidates:
        pair_score = comp[i, j]
        next_mask = new_mask & ~(1 << j)
        current_pairs.append((i, j))
        allocations[0] += 1
        search(next_mask, current_score + pair_score, comp, n, best_score, best_pairs, current_pairs, memo, allocations)
        current_pairs.pop()

@njit(cache=True)
def branch_and_bound_numba(comp, n):
    """
    Entry point for the Numba-accelerated branch and bound search.
    
    Returns a tuple:
      (best total compatibility score, best pairing as a typed list, allocation count)
    """
    best_score = np.array([-1e9], dtype=np.float64)  # 1-element array for best score.
    best_pairs = typed.List.empty_list(pair_type)
    current_pairs = typed.List.empty_list(pair_type)
    memo = typed.Dict.empty(key_type=types.int64, value_type=types.float64)
    allocations = np.zeros(1, dtype=np.int64)  # 1-element counter.
    full_mask = np.int64((1 << n) - 1)  # Cast to np.int64.
    search(full_mask, 0.0, comp, n, best_score, best_pairs, current_pairs, memo, allocations)
    return best_score[0], best_pairs, allocations[0]

# ============================================================
# Python wrapper function
# ============================================================
def solve_roommate_matching_improved(csv_path, method='branch_and_bound'):
    """
    Reads compatibility data from CSV and computes the optimal pairing using
    a branch and bound algorithm with a bitmask-based representation and Numba acceleration.
    
    Returns:
      - best_pairing_named: List of tuples (student names) for the optimal pairing.
      - best_score: Total compatibility score.
      - allocations: Number of recursive branch allocations (nodes expanded).
      - run_time: Total execution time in seconds.
      - peak_memory: Peak memory usage in MiB.
    """
    # Load CSV data.
    df = pd.read_csv(csv_path)
    
    # Build a sorted list of unique student names.
    student_names = sorted(list(set(df['Student 1'].tolist() + df['Student 2'].tolist())))
    n = len(student_names)
    student_to_index = {name: i for i, name in enumerate(student_names)}
    index_to_student = {i: name for i, name in enumerate(student_names)}
    
    # Build an n x n compatibility matrix as a NumPy array.
    comp = np.zeros((n, n), dtype=np.float64)
    for _, row in df.iterrows():
        i = student_to_index[row['Student 1']]
        j = student_to_index[row['Student 2']]
        score = row['Compatibility Score']
        comp[i, j] = score
        comp[j, i] = score  # Ensure symmetry.
    
    # Start timing and memory tracking.
    tracemalloc.start()
    start_time = time.time()
    
    best, best_pairs, allocs = branch_and_bound_numba(comp, n)
    
    end_time = time.time()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    
    run_time = end_time - start_time
    peak_memory = peak / 10**6  # Convert bytes to MiB.
    
    # Convert best_pairs (typed list of index pairs) to pairs of student names.
    best_pairing_named = [(index_to_student[p[0]], index_to_student[p[1]]) for p in best_pairs]
    
    print("=== Improved Branch and Bound (Numba) ===")
    print(f"Optimal Pairing: {best_pairing_named}")
    print(f"Total Compatibility Score: {best}")
    print(f"Number of Allocations (nodes expanded): {allocs}")
    print(f"Time Taken: {run_time:.4f} seconds")
    print(f"Peak Memory Usage: {peak_memory:.3f} MiB")
    
    return best_pairing_named, best, allocs, run_time, peak_memory

In [16]:
solve_roommate_matching_improved("data/compatibility_6.csv")

=== Improved Branch and Bound (Numba) ===
Optimal Pairing: [('Student_1', 'Student_2'), ('Student_3', 'Student_5'), ('Student_4', 'Student_6')]
Total Compatibility Score: 2.2812001621554097
Number of Allocations (nodes expanded): 13
Time Taken: 0.0001 seconds
Peak Memory Usage: 0.003 MiB


([('Student_1', 'Student_2'),
  ('Student_3', 'Student_5'),
  ('Student_4', 'Student_6')],
 2.2812001621554097,
 13,
 0.00012087821960449219,
 0.0028)

In [2]:
solve_roommate_matching_improved("data/compatibility_6.csv") # Generated after starting kernel new

=== Improved Branch and Bound (Numba) ===
Optimal Pairing: [('Student_1', 'Student_2'), ('Student_3', 'Student_5'), ('Student_4', 'Student_6')]
Total Compatibility Score: 2.2812001621554097
Number of Allocations (nodes expanded): 13
Time Taken: 0.4755 seconds
Peak Memory Usage: 19.920 MiB


([('Student_1', 'Student_2'),
  ('Student_3', 'Student_5'),
  ('Student_4', 'Student_6')],
 2.2812001621554097,
 13,
 0.4755070209503174,
 19.919839)

In [2]:
solve_roommate_matching_improved("data/compatibility_10.csv") # Generated after starting kernel new

=== Improved Branch and Bound (Numba) ===
Optimal Pairing: [('Student_1', 'Student_8'), ('Student_10', 'Student_2'), ('Student_3', 'Student_4'), ('Student_5', 'Student_6'), ('Student_7', 'Student_9')]
Total Compatibility Score: 3.708931604508898
Number of Allocations (nodes expanded): 79
Time Taken: 0.4819 seconds
Peak Memory Usage: 19.920 MiB


([('Student_1', 'Student_8'),
  ('Student_10', 'Student_2'),
  ('Student_3', 'Student_4'),
  ('Student_5', 'Student_6'),
  ('Student_7', 'Student_9')],
 3.708931604508898,
 79,
 0.48190999031066895,
 19.919655)

In [2]:
solve_roommate_matching_improved("data/compatibility_20.csv") # Generated after starting kernel new

=== Improved Branch and Bound (Numba) ===
Optimal Pairing: [('Student_1', 'Student_8'), ('Student_10', 'Student_12'), ('Student_11', 'Student_14'), ('Student_13', 'Student_17'), ('Student_15', 'Student_19'), ('Student_16', 'Student_6'), ('Student_18', 'Student_7'), ('Student_2', 'Student_5'), ('Student_20', 'Student_9'), ('Student_3', 'Student_4')]
Total Compatibility Score: 8.003157676038137
Number of Allocations (nodes expanded): 1068
Time Taken: 0.4920 seconds
Peak Memory Usage: 19.921 MiB


([('Student_1', 'Student_8'),
  ('Student_10', 'Student_12'),
  ('Student_11', 'Student_14'),
  ('Student_13', 'Student_17'),
  ('Student_15', 'Student_19'),
  ('Student_16', 'Student_6'),
  ('Student_18', 'Student_7'),
  ('Student_2', 'Student_5'),
  ('Student_20', 'Student_9'),
  ('Student_3', 'Student_4')],
 8.003157676038137,
 1068,
 0.49198484420776367,
 19.920558)

In [2]:
solve_roommate_matching_improved("data/compatibility_50.csv") # Generated after starting kernel new

=== Improved Branch and Bound (Numba) ===
Optimal Pairing: [('Student_1', 'Student_35'), ('Student_10', 'Student_46'), ('Student_11', 'Student_4'), ('Student_12', 'Student_48'), ('Student_13', 'Student_17'), ('Student_14', 'Student_3'), ('Student_15', 'Student_7'), ('Student_16', 'Student_37'), ('Student_18', 'Student_34'), ('Student_19', 'Student_5'), ('Student_2', 'Student_22'), ('Student_20', 'Student_23'), ('Student_21', 'Student_24'), ('Student_25', 'Student_39'), ('Student_26', 'Student_49'), ('Student_27', 'Student_8'), ('Student_28', 'Student_44'), ('Student_29', 'Student_6'), ('Student_30', 'Student_9'), ('Student_31', 'Student_45'), ('Student_32', 'Student_33'), ('Student_36', 'Student_43'), ('Student_38', 'Student_47'), ('Student_40', 'Student_42'), ('Student_41', 'Student_50')]
Total Compatibility Score: 22.377550358731888
Number of Allocations (nodes expanded): 7692404
Time Taken: 8.0442 seconds
Peak Memory Usage: 19.920 MiB


([('Student_1', 'Student_35'),
  ('Student_10', 'Student_46'),
  ('Student_11', 'Student_4'),
  ('Student_12', 'Student_48'),
  ('Student_13', 'Student_17'),
  ('Student_14', 'Student_3'),
  ('Student_15', 'Student_7'),
  ('Student_16', 'Student_37'),
  ('Student_18', 'Student_34'),
  ('Student_19', 'Student_5'),
  ('Student_2', 'Student_22'),
  ('Student_20', 'Student_23'),
  ('Student_21', 'Student_24'),
  ('Student_25', 'Student_39'),
  ('Student_26', 'Student_49'),
  ('Student_27', 'Student_8'),
  ('Student_28', 'Student_44'),
  ('Student_29', 'Student_6'),
  ('Student_30', 'Student_9'),
  ('Student_31', 'Student_45'),
  ('Student_32', 'Student_33'),
  ('Student_36', 'Student_43'),
  ('Student_38', 'Student_47'),
  ('Student_40', 'Student_42'),
  ('Student_41', 'Student_50')],
 22.377550358731888,
 7692404,
 8.044166803359985,
 19.919595)