In [23]:
import random
import pandas as pd
import ast

# Parameters
num_students = 150

# Import supervisors data and process topics as lists
supervisors_df = pd.read_csv('data\\supervisors_list.csv')

# Update supervisors_df to ensure it has the required columns
if 'supervisor_id' not in supervisors_df.columns:
    supervisors_df['supervisor_id'] = range(1, len(supervisors_df) + 1)

# Combine expertise areas into topics list
if 'topics' not in supervisors_df.columns:
    def convert_to_list(x):
        try:
            # Try to safely evaluate as literal
            if pd.notna(x):
                return ast.literal_eval(x)
            return []
        except:
            # If evaluation fails, return empty list
            return []
            
    # Convert each expertise area to a list and combine them
    supervisors_df['topics'] = (
        supervisors_df['Expertise Area 1'].apply(convert_to_list) +
        supervisors_df['Expertise Area 2'].apply(convert_to_list) +
        supervisors_df['Expertise Area 3'].apply(convert_to_list)
    ).apply(lambda x: list(set(x)))  # Remove duplicates

if 'capacity' not in supervisors_df.columns:
    supervisors_df['capacity'] = [random.randint(3, 10) for _ in range(len(supervisors_df))]

# Generate random students data
students_df = pd.DataFrame()
students_df['student_id'] = range(1, num_students + 1)
students_df['student_name'] = [f'Student {i}' for i in range(1, num_students + 1)]

# Select random topics from supervisors' topics lists
def get_random_topics(supervisor_topics, k):
    if not supervisor_topics or len(supervisor_topics) == 0:
        return []
    # Ensure k doesn't exceed the number of available topics
    k = min(k, len(supervisor_topics))
    return random.sample(supervisor_topics, k)

students_df['positive_topics'] = students_df['student_id'].apply(
    lambda x: get_random_topics(
        supervisors_df.loc[
            supervisors_df['supervisor_id'] == random.randint(1, len(supervisors_df)), 
            'topics'
        ].values[0],
        random.randint(1, 3)
    )
)

students_df['negative_topics'] = students_df['student_id'].apply(
    lambda x: get_random_topics(
        supervisors_df.loc[
            supervisors_df['supervisor_id'] == random.randint(1, len(supervisors_df)), 
            'topics'
        ].values[0],
        random.randint(1, 3)
    )
)

students_df['programme'] = students_df['student_id'].apply(
    lambda x: random.choice(['BCS / BSE / BIT', 'BSDA', 'BCNS'])
)

# Display the datasets
print("Students Dataset:")
print(students_df.head())
print("\nSupervisors Dataset:")
print(supervisors_df.head())

Students Dataset:
   student_id student_name                                    positive_topics  \
0           1    Student 1                      [Signal and Image Processing]   
1           2    Student 2                    [IoT, Databases, Deep Learning]   
2           3    Student 3                           [open on data analytics]   
3           4    Student 4  [Network architectures and protocols, High-spe...   
4           5    Student 5                   [Deep Learning/Machine Learning]   

                                     negative_topics        programme  
0                                 [Image Processing]             BSDA  
1                                 [Machine Learning]             BCNS  
2  [Operational optimisation for sustainability, ...             BSDA  
3                                         [Robotics]             BCNS  
4                                       [GenAI, IoT]  BCS / BSE / BIT  

Supervisors Dataset:
                                      Nam

In [24]:
from pulp import LpProblem, LpVariable, LpMaximize, lpSum, LpBinary

# Create the optimization problem
problem = LpProblem("Optimal_Matching", LpMaximize)

# Create decision variables for each student-supervisor pair
decision_vars = {}
for _, student in students_df.iterrows():
    for _, supervisor in supervisors_df.iterrows():
        decision_vars[(student['student_id'], supervisor['supervisor_id'])] = LpVariable(
            f"x_{student['student_id']}_{supervisor['supervisor_id']}", 0, 1, LpBinary
        )

# Objective function with prioritized programme preferences
problem += lpSum(
    decision_vars[(student['student_id'], supervisor['supervisor_id'])] * (
        # Programme preference weighting (higher weights to prioritize)
        (10 if supervisor['Preferred Programme for Supervision (1st Choice)'] == student.get('programme', '') else
         5 if supervisor['Preferred Programme for Supervision (2nd Choice)'] == student.get('programme', '') else
         3 if "No Preference" in supervisor['Preferred Programme for Supervision (1st Choice)'] or "No Preference" in supervisor['Preferred Programme for Supervision (2nd Choice)'] else 0) +
        # Topic preference weighting (lower weights relative to programme)
        (2 * sum(1 for topic in student['positive_topics'] 
                if topic in str(supervisor['topics']).split(', ')) -
         1 * sum(1 for topic in student['negative_topics'] 
                if topic in str(supervisor['topics']).split(', ')))
    )
    for _, student in students_df.iterrows()
    for _, supervisor in supervisors_df.iterrows()
)

# Constraint: Each student is assigned to exactly one supervisor
for _, student in students_df.iterrows():
    problem += lpSum(
        decision_vars[(student['student_id'], supervisor['supervisor_id'])]
        for _, supervisor in supervisors_df.iterrows()
    ) == 1

# Constraint: Each supervisor does not exceed their capacity
for _, supervisor in supervisors_df.iterrows():
    capacity = supervisor.get('capacity', 5)  # Default capacity of 5
    problem += lpSum(
        decision_vars[(student['student_id'], supervisor['supervisor_id'])]
        for _, student in students_df.iterrows()
    ) <= capacity

# Solve the problem
problem.solve()

# Extract and display results with detailed matching information
assignments = []
for _, student in students_df.iterrows():
    for _, supervisor in supervisors_df.iterrows():
        if decision_vars[(student['student_id'], supervisor['supervisor_id'])].value() == 1:
            programme_match_type = (
                "First Choice" if supervisor['Preferred Programme for Supervision (1st Choice)'] == student.get('programme', '') else
                "Second Choice" if supervisor['Preferred Programme for Supervision (2nd Choice)'] == student.get('programme', '') else
                "No Preference" if "No Preference" in supervisor['Preferred Programme for Supervision (1st Choice)'] or "No Preference" in supervisor['Preferred Programme for Supervision (2nd Choice)'] else
                "No Match"
            )
            matching_topics = [topic for topic in student['positive_topics'] 
                             if topic in str(supervisor['topics']).split(', ')]
            conflicting_topics = [topic for topic in student['negative_topics'] 
                                if topic in str(supervisor['topics']).split(', ')]
            assignments.append({
                'student_id': student['student_id'],
                'student_name': student['student_name'],
                'supervisor_id': supervisor['supervisor_id'],
                'supervisor_name': supervisor['Name'],
                'programme_match': programme_match_type,
                'matching_topics': matching_topics,
                'conflicting_topics': conflicting_topics,
                'match_score': len(matching_topics) - len(conflicting_topics)
            })

# Convert assignments to DataFrame for better display
assignments_df = pd.DataFrame(assignments)
print("\nOptimal Assignments:")
print(assignments_df)

# Calculate and display statistics
print("\nAssignment Statistics:")
print(f"Total assignments: {len(assignments)}")
print("\nProgramme Matching Distribution:")
print(assignments_df['programme_match'].value_counts())
print(f"\nAverage matching topics: {assignments_df['matching_topics'].apply(len).mean():.2f}")
print(f"Average conflicting topics: {assignments_df['conflicting_topics'].apply(len).mean():.2f}")


Optimal Assignments:
     student_id student_name  supervisor_id                 supervisor_name  \
0             1    Student 1              3  Assoc. Prof. Dr Azam Che Idris   
1             2    Student 2             42                   Pyi Phyo Aung   
2             3    Student 3             13      Dr Samuel Mofoluwa Ajibade   
3             4    Student 4             25      Dr Ahmad Sahban Rafsanjani   
4             5    Student 5              8          Dr Faris Syahmi Samidi   
..          ...          ...            ...                             ...   
145         146  Student 146             24              Dr Aaliya Sarfaraz   
146         147  Student 147             37                       Foo Jinny   
147         148  Student 148             42                   Pyi Phyo Aung   
148         149  Student 149             33            Dr Melody Tan Shi Ai   
149         150  Student 150             18               Prof. Chua Hui Na   

    programme_match matching_