# optimal project allocation

In [1]:
import pandas as pd
import numpy as np
import random

## Greedy Algorithm

In [3]:
def standardize_project_name(project):
    if isinstance(project, str):
        project = project.strip()
        if project and project[1] == '.':
            project = project[:2] + ' ' + project[2:].strip()
        return project
    return None

def read_excel_and_prepare_choices(filename):
    # Read the Excel file
    df = pd.read_excel(filename)
    
    # Standardize project choices
    choice_columns = [
        'Project (First Choice)',
        'Project (Second Choice)',
        'Project (Third Choice)',
        'Project (Fourth Choice)',
        'Project (Fifth Choice)'
    ]
    
    for col in choice_columns:
        df[col] = df[col].apply(standardize_project_name)
    
    return df, choice_columns

def greedy_allocation(df, choice_columns, project_capacity):
    # Initialize results list
    allocation_results = []
    
    # Initialize the number of allocations per project
    project_allocation_counts = {project: 0 for project in project_capacity.keys()}
    
    # Shuffle students randomly or sort based on a criterion like GPA (assuming there is a GPA column)
    # df = df.sort_values(by='GPA', ascending=False)
    df = df.sample(frac=1).reset_index(drop=True)
    
    # Iterate over each student
    for idx, row in df.iterrows():
        allocated = False
        # Attempt allocation based on preference order
        for col in choice_columns:
            preferred_project = row[col]
            if preferred_project and preferred_project in project_capacity:
                # Check if the project has available slots
                if project_allocation_counts[preferred_project] < project_capacity[preferred_project]:
                    allocation_results.append({
                        'Student ID': row['Student ID'] if 'Student ID' in row else idx + 1,
                        'Student Name': row['Name'] if 'Name' in row else f'Student {idx + 1}',
                        'Assigned Project': preferred_project,
                        'Preference Rank': choice_columns.index(col) + 1
                    })
                    project_allocation_counts[preferred_project] += 1
                    allocated = True
                    break  # Break the loop and allocate the next student
        if not allocated:
            allocation_results.append({
                'Student ID': row['Student ID'] if 'Student ID' in row else idx + 1,
                'Student Name': row['Name'] if 'Name' in row else f'Student {idx + 1}',
                'Assigned Project': 'Not Assigned',
                'Preference Rank': None
            })
    
    # Convert results to a DataFrame
    results_df = pd.DataFrame(allocation_results)
    return results_df

def main():
    input_filename = 'Dataset.questionnaire.2023-2024.MGT 555 - Project Selection Form.xlsx'
    output_filename = 'greedy_assignment_results.xlsx'
    
    # Define the capacity of each project
    project_capacity = {
        '1. Boschung Mecatronic AG - FAST IOT': 5,
        '2. Boschung Mecatronic AG - Pavement Sensor loT': 5,
        '3. Datwyler- Electrically conductive rubber to be used as strain sensor': 5,
        '4. Datwyler-Energy Harvesting Using Nanogenerator-Elastomers Montblanc  Exercise Detection': 5,
        '5. Logitech-Autonomous Mouse': 5,
        '6. Logitech-Eco-Design Plastic': 5,
        '7. Schindler-Alternative Mobility': 5,
        '8. Schindler-Living Reusing Heat fo Greenhouse and Building Heating': 5,
        '9. ZF-Group-Sustainable Grid Management Infrastructure Solutions for eTrucks eBuses and eTrailers': 5,
        '10. ZF-Group-Next Generation Battery Electric Fuel Cell Commercial VehicleVacheron Constantin': 5
    }
    
    df, choice_columns = read_excel_and_prepare_choices(input_filename)
    results_df = greedy_allocation(df, choice_columns, project_capacity)
    
    # Save results to Excel
    results_df.to_excel(output_filename, index=False)
    print(f"Assignment results have been saved to '{output_filename}'.")

if __name__ == '__main__':
    main()


Assignment results have been saved to 'greedy_assignment_results.xlsx'.


## Gale-Shapley Algorithm

In [4]:
import pandas as pd
import numpy as np
import random

def standardize_project_name(project):
    if isinstance(project, str):
        project = project.strip()
        if project and project[1] == '.':
            project = project[:2] + ' ' + project[2:].strip()
        return project
    return None

def read_excel_and_prepare_preferences(filename):
    # Read the Excel file
    df = pd.read_excel(filename)
    
    # Standardize students' project preferences
    student_preferences = {}
    choice_columns = [
        'Project (First Choice)',
        'Project (Second Choice)',
        'Project (Third Choice)',
        'Project (Fourth Choice)',
        'Project (Fifth Choice)'
    ]
    
    for idx, row in df.iterrows():
        student_id = row['Student ID'] if 'Student ID' in row else idx + 1
        preferences = []
        for col in choice_columns:
            project = standardize_project_name(row[col])
            if project:
                preferences.append(project)
        student_preferences[student_id] = preferences
    
    # Construct project preferences for students (here, random preferences are used, can be modified as needed)
    all_projects = set()
    for prefs in student_preferences.values():
        all_projects.update(prefs)
    all_projects = list(all_projects)
    
    project_preferences = {}
    for project in all_projects:
        students = list(student_preferences.keys())
        random.shuffle(students)
        project_preferences[project] = students
    
    return student_preferences, project_preferences

def gale_shapley(student_prefs, project_prefs, project_capacity):
    # Initialization
    free_students = list(student_prefs.keys())
    student_assignments = {}
    project_assignments = {project: [] for project in project_prefs.keys()}
    
    # Next proposal index for each student
    student_next_proposal = {student: 0 for student in student_prefs.keys()}
    
    while free_students:
        student = free_students.pop(0)
        prefs = student_prefs[student]
        if student_next_proposal[student] >= len(prefs):
            continue  # The student has no more preferences to apply for
        project = prefs[student_next_proposal[student]]
        student_next_proposal[student] += 1
        
        current_assigned = project_assignments[project]
        capacity = project_capacity.get(project, 1)
        
        if len(current_assigned) < capacity:
            # The project has available slots, directly accept the student
            project_assignments[project].append(student)
            student_assignments[student] = project
        else:
            # The project is full, need to compare preferences
            worst_student = None
            for assigned_student in current_assigned:
                if project_prefs[project].index(assigned_student) > project_prefs[project].index(student):
                    worst_student = assigned_student
            if worst_student:
                # Replace the least preferred student
                project_assignments[project].remove(worst_student)
                project_assignments[project].append(student)
                student_assignments[student] = project
                free_students.append(worst_student)
            else:
                # The student is rejected, continue applying for the next preference
                free_students.append(student)
    
    return student_assignments

def main():
    input_filename = 'Dataset.questionnaire.2023-2024.MGT 555 - Project Selection Form.xlsx'
    output_filename = 'stable_matching_results.xlsx'
    
    # Define the capacity of each project
    project_capacity = {
        '1. Boschung Mecatronic AG - FAST IOT': 5,
        '2. Boschung Mecatronic AG - Pavement Sensor loT': 5,
        '3. Datwyler- Electrically conductive rubber to be used as strain sensor': 5,
        '4. Datwyler-Energy Harvesting Using Nanogenerator-Elastomers Montblanc  Exercise Detection': 5,
        '5. Logitech-Autonomous Mouse': 5,
        '6. Logitech-Eco-Design Plastic': 5,
        '7. Schindler-Alternative Mobility': 5,
        '8. Schindler-Living Reusing Heat fo Greenhouse and Building Heating': 5,
        '9. ZF-Group-Sustainable Grid Management Infrastructure Solutions for eTrucks eBuses and eTrailers': 5,
        '10. ZF-Group-Next Generation Battery Electric Fuel Cell Commercial VehicleVacheron Constantin': 5
    }
    
    student_prefs, project_prefs = read_excel_and_prepare_preferences(input_filename)
    assignments = gale_shapley(student_prefs, project_prefs, project_capacity)
    
    # Build results DataFrame
    results = []
    for student, project in assignments.items():
        preference_rank = student_prefs[student].index(project) + 1 if project in student_prefs[student] else None
        results.append({
            'Student ID': student,
            'Assigned Project': project,
            'Preference Rank': preference_rank
        })
    results_df = pd.DataFrame(results)
    
    # Save results
    results_df.to_excel(output_filename, index=False)
    print(f"Assignment results have been saved to '{output_filename}'.")

if __name__ == '__main__':
    main()


Assignment results have been saved to 'stable_matching_results.xlsx'.


## Score-Based Allocation

In [5]:
import pandas as pd
import numpy as np
from pulp import LpMaximize, LpProblem, LpVariable, lpSum, LpStatus

def standardize_project_name(project):
    if isinstance(project, str):
        project = project.strip()
        if project and project[1] == '.':
            project = project[:2] + ' ' + project[2:].strip()
        return project
    return None

def read_excel_and_prepare_scores(filename):
    # Read the Excel file
    df = pd.read_excel(filename)
    
    # Construct the list of students and projects
    students = df.index.tolist()
    choice_columns = [
        'Project (First Choice)',
        'Project (Second Choice)',
        'Project (Third Choice)',
        'Project (Fourth Choice)',
        'Project (Fifth Choice)'
    ]
    
    projects = set()
    for col in choice_columns:
        df[col] = df[col].apply(standardize_project_name)
        projects.update(df[col].dropna().unique())
    projects = list(projects)
    
    # Construct the score matrix
    scores = pd.DataFrame(0, index=students, columns=projects)
    for idx, row in df.iterrows():
        for rank, col in enumerate(choice_columns):
            project = row[col]
            if project:
                scores.at[idx, project] = 5 - rank  # First choice: 5 points, second choice: 4 points, and so on
    
    return students, projects, scores

def optimize_allocation(students, projects, scores, project_capacity):
    # Define the problem
    prob = LpProblem("Project_Allocation", LpMaximize)
    
    # Define variables
    x = LpVariable.dicts("assign", [(s, p) for s in students for p in projects], cat='Binary')
    
    # Objective function: maximize total score
    prob += lpSum([scores.at[s, p] * x[(s, p)] for s in students for p in projects])
    
    # Constraints:
    # Each student can only be assigned to one project
    for s in students:
        prob += lpSum([x[(s, p)] for p in projects]) <= 1
    
    # Project capacity constraints
    for p in projects:
        prob += lpSum([x[(s, p)] for s in students]) <= project_capacity.get(p, 1)
    
    # Solve the problem
    prob.solve()
    
    # Process results
    allocation = {}
    for s in students:
        for p in projects:
            if x[(s, p)].varValue == 1:
                allocation[s] = p
                break
        else:
            allocation[s] = None  # Not assigned
    
    return allocation

def main():
    input_filename = 'Dataset.questionnaire.2023-2024.MGT 555 - Project Selection Form.xlsx'
    output_filename = 'score_based_allocation_results.xlsx'
    
    # Define the capacity of each project
    project_capacity = {
        '1. Boschung Mecatronic AG - FAST IOT': 5,
        '2. Boschung Mecatronic AG - Pavement Sensor loT': 5,
        '3. Datwyler- Electrically conductive rubber to be used as strain sensor': 5,
        '4. Datwyler-Energy Harvesting Using Nanogenerator-Elastomers Montblanc  Exercise Detection': 5,
        '5. Logitech-Autonomous Mouse': 5,
        '6. Logitech-Eco-Design Plastic': 5,
        '7. Schindler-Alternative Mobility': 5,
        '8. Schindler-Living Reusing Heat fo Greenhouse and Building Heating': 5,
        '9. ZF-Group-Sustainable Grid Management Infrastructure Solutions for eTrucks eBuses and eTrailers': 5,
        '10. ZF-Group-Next Generation Battery Electric Fuel Cell Commercial VehicleVacheron Constantin': 5
    }
    
    students, projects, scores = read_excel_and_prepare_scores(input_filename)
    allocation = optimize_allocation(students, projects, scores, project_capacity)
    
    # Build results DataFrame
    df = pd.read_excel(input_filename)
    results = []
    for idx in students:
        student_info = df.iloc[idx]
        assigned_project = allocation.get(idx)
        preference_rank = None
        if assigned_project:
            for rank, col in enumerate([
                'Project (First Choice)',
                'Project (Second Choice)',
                'Project (Third Choice)',
                'Project (Fourth Choice)',
                'Project (Fifth Choice)'
            ]):
                if student_info[col] == assigned_project:
                    preference_rank = rank + 1
                    break
        results.append({
            'Student ID': student_info.get('Student ID', idx + 1),
            'Student Name': student_info.get('Name', f'Student {idx + 1}'),
            'Assigned Project': assigned_project if assigned_project else 'Not Assigned',
            'Preference Rank': preference_rank
        })
    results_df = pd.DataFrame(results)
    
    # Save results
    results_df.to_excel(output_filename, index=False)
    print(f"Assignment results have been saved to '{output_filename}'.")

if __name__ == '__main__':
    main()


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/anaconda3/lib/python3.11/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/g5/t_l131xj32z51nwyqkjtlbth0000gp/T/58bd63ea828840a69f8c6968c41f6df8-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /var/folders/g5/t_l131xj32z51nwyqkjtlbth0000gp/T/58bd63ea828840a69f8c6968c41f6df8-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 62 COLUMNS
At line 2178 RHS
At line 2236 BOUNDS
At line 2707 ENDATA
Problem MODEL has 57 rows, 470 columns and 940 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 214 - 0.00 seconds
Cgl0008I 47 inequality constraints converted to equality constraints
Cgl0005I 47 SOS with 517 members
Cgl0004I processed model has 57 rows, 517 columns (517 integer (517 of which binary)) and 987 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0038I 