In [1]:
!pip install gurobipy

Collecting gurobipy
  Downloading gurobipy-12.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (16 kB)
Downloading gurobipy-12.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (14.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.5/14.5 MB[0m [31m35.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gurobipy
Successfully installed gurobipy-12.0.2


In [26]:
# Load the person and task data
person_data = pd.read_excel('/content/Employee_table.xlsx', sheet_name="person")
df1 = pd.DataFrame(person_data)

task_data = pd.read_excel('/content/Employee_table.xlsx', sheet_name="tasks")
df2 = pd.DataFrame(task_data)

In [27]:
# df1 = df1.rename(columns={
#     'resource': 'name',
#     'skill': 'skills',
#     'efficiency': 'efficiency'
# })
# df2 = df2.rename(columns={
#     'Subtask/Description': 'task_name',
#     'Skill': 'required_skills',
#     'Effort (in hours)': 'duration'
# })
# print(df1.columns)
# print(df2.columns)

In [28]:
import time
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
from datetime import datetime, timedelta

start_time = time.time()

# Create skill_to_employee dictionary with efficiency
skill_to_employee = {}
employee_efficiency = {}
for _, row in df1.iterrows():
    employee = row['name']
    skill = row['skills']
    efficiency = row['efficiency']
    if skill not in skill_to_employee:
        skill_to_employee[skill] = []
    skill_to_employee[skill].append(employee)

    # Store efficiency for each employee
    if employee not in employee_efficiency:
        employee_efficiency[employee] = efficiency

# Calculate target hours (adjusted for efficiency)
total_task_duration = df2['duration'].sum()
total_efficiency = sum(employee_efficiency.values())
target_hours = total_task_duration / total_efficiency  # Ideal workload per efficiency unit

# Initialize optimization model with Gurobi
model = gp.Model("Task_Assignment_Balanced")

# Decision variables
task_assignment = {}
for _, task_row in df2.iterrows():
    task_name = task_row['task_name']
    required_skill = task_row['required_skills']
    duration = task_row['duration']

    # Only consider employees with the required skill for each task
    if required_skill in skill_to_employee:
        for employee in skill_to_employee[required_skill]:
            task_assignment[(employee, task_name)] = model.addVar(
                vtype=GRB.BINARY,
                name=f"x_{employee}_{task_name}"
            )

# Deviation variables to balance workload
deviation_above = model.addVar(lb=0, name="Deviation_Above")
deviation_below = model.addVar(lb=0, name="Deviation_Below")

# Objective function: Minimize workload deviation
model.setObjective(deviation_above + deviation_below, GRB.MINIMIZE)

# Constraint 1: Each task must be assigned to exactly one employee with the matching skill
for _, task_row in df2.iterrows():
    task_name = task_row['task_name']
    required_skill = task_row['required_skills']

    # Only proceed if the skill has matching employees
    if required_skill in skill_to_employee:
        model.addConstr(
            gp.quicksum(task_assignment[(employee, task_name)]
                        for employee in skill_to_employee[required_skill]) == 1,
            name=f"Task_Assignment_{task_name}"
        )
    else:
        print(f"Task {task_name} requires skill {required_skill} but no employee has it.")

# Constraint 2: Ensure each employee's workload stays within target hours and minimize deviations
for employee in df1['name'].unique():
    # Get all tasks that can be assigned to this employee
    employee_tasks = [(employee, task_name) for task_name in df2['task_name']
                     if (employee, task_name) in task_assignment]

    if employee_tasks:
        # Total hours for each employee, adjusted for efficiency
        employee_total_hours = gp.quicksum(
            task_assignment[(employee, task_name)] * df2.loc[df2['task_name'] == task_name, 'duration'].values[0]
            for employee, task_name in employee_tasks
        )

        # Adjust target hours based on employee's efficiency
        employee_target = target_hours * employee_efficiency[employee]

        # Ensure workload is close to target with allowed deviation
        model.addConstr(
            employee_total_hours <= employee_target + deviation_above,
            name=f"Max_Hours_{employee}"
        )
        model.addConstr(
            employee_total_hours >= employee_target - deviation_below,
            name=f"Min_Hours_{employee}"
        )

print(f"Number of variables in the problem: {model.NumVars}")

# Solve the optimization problem
model.optimize()

# Extract and format the results
output_data = []
start_date = datetime.strptime("19-05-2025", "%d-%m-%Y")  # Start date for Day-1

for employee in df1['name'].unique():
    assigned_tasks = []
    required_skills = []
    total_hours = 0
    daily_distribution = {}
    current_hour = 1
    daily_limit = 8  # Maximum hours per day

    for _, task_row in df2.iterrows():
        task_name = task_row['task_name']
        duration = task_row['duration']
        required_skill = task_row['required_skills']

        # Check if task is assigned to the employee
        if (employee, task_name) in task_assignment and task_assignment[(employee, task_name)].X > 0.5:
            assigned_tasks.append(task_name)
            required_skills.append(required_skill)
            total_hours += duration

            # Assign tasks day-by-day with carry-over logic based on 8-hour workday
            hours_remaining = duration
            while hours_remaining > 0:
                day = (current_hour - 1) // daily_limit + 1
                if day not in daily_distribution:
                    daily_distribution[day] = []

                # Calculate available hours left in the current day
                hours_in_day = daily_limit - ((current_hour - 1) % daily_limit)

                if hours_remaining <= hours_in_day:
                    # Task can be completed within the current day's hours
                    daily_distribution[day].append(f"{task_name} ({hours_remaining} hours)")
                    current_hour += hours_remaining
                    hours_remaining = 0
                else:
                    # Part of the task will be completed today, remaining will go to next day
                    daily_distribution[day].append(f"{task_name} ({hours_in_day} hours)")
                    hours_remaining -= hours_in_day
                    current_hour += hours_in_day

    # Calculate days and remaining hours
    days = total_hours // daily_limit
    remaining_hours = total_hours % daily_limit

    # Prepare day-by-day utilization columns with actual dates
    max_day = days + (1 if remaining_hours > 0 else 0)
    day_utilization = {}
    for day in range(1, max_day + 1):
        current_date = start_date + timedelta(days=day - 1)
        day_utilization[f'{current_date.strftime("%d-%m-%Y")}'] = ', '.join(daily_distribution.get(day, ['available']))

    # If max days < total days available, fill the remaining days as 'available'
    for day in range(max_day + 1, 6):  # Assuming 5-day workweek
        current_date = start_date + timedelta(days=day - 1)
        day_utilization[f'{current_date.strftime("%d-%m-%Y")}'] = 'available'

    # Add to output if tasks were assigned
    if assigned_tasks:
        output_data.append({
            'Employee': employee,
            'Assigned_Tasks': ', '.join(assigned_tasks),
            'Required_Skills': ', '.join(required_skills),
            'Total_Hours': total_hours,
            'Days': f"{days} days, {remaining_hours} hours",
            **day_utilization
        })

# Calculate total project duration
total_project_hours = max(d['Total_Hours'] for d in output_data) if output_data else 0

# Calculate contribution for each employee
for d in output_data:
    d['Person_Contribution (%)'] = (d['Total_Hours'] / total_project_hours) * 100 if total_project_hours > 0 else 0

# Capture unassigned tasks for NA employees
all_assigned_tasks = [t for d in output_data for t in d['Assigned_Tasks'].split(', ')]
unassigned_tasks = [task_row['task_name'] for _, task_row in df2.iterrows()
                    if task_row['task_name'] not in all_assigned_tasks]

for task in unassigned_tasks:
    output_data.append({
        'Employee': 'NA',
        'Assigned_Tasks': task,
        'Required_Skills': df2.loc[df2['task_name'] == task, 'required_skills'].values[0],
        'Total_Hours': df2.loc[df2['task_name'] == task, 'duration'].values[0],
        'Days': 'NA',
        'Person_Contribution (%)': 'NA',
        **{f'{(start_date + timedelta(days=day - 1)).strftime("%d-%m-%Y")}': 'NA' for day in range(1, 6)}
    })

# Display output
output_df = pd.DataFrame(output_data)
l = len(output_df['Employee'].dropna().unique()) - 1 if 'NA' in output_df['Employee'].values else len(output_df['Employee'].unique())
assigned_tasks = output_df.iloc[:l]

unassigned_tasks_df = output_df[output_df['Employee'] == 'NA'][['Assigned_Tasks', 'Required_Skills', 'Total_Hours']]

# Save to CSV files
assigned_tasks.to_csv("Gurobi_assigned_tasks1.csv", index=False)
unassigned_tasks_df.to_csv("Gurobi_unassigned_tasks1.csv", index=False)

end_time = time.time()
print(f"Execution Time: {end_time - start_time} seconds")


Task task2d requires skill nodejs but no employee has it.
Task task12b requires skill go but no employee has it.
Task task13a requires skill nodejs but no employee has it.
Task task13b requires skill go but no employee has it.
Task task15a requires skill go but no employee has it.
Task task16a requires skill go but no employee has it.
Task task17b requires skill go but no employee has it.
Task task18b requires skill go but no employee has it.
Number of variables in the problem: 0
Gurobi Optimizer version 12.0.2 build v12.0.2rc0 (linux64 - "Ubuntu 22.04.4 LTS")

CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 1 physical cores, 2 logical processors, using up to 2 threads

Optimize a model with 91 rows, 183 columns and 565 nonzeros
Model fingerprint: 0xaeae54f5
Variable types: 2 continuous, 181 integer (181 binary)
Coefficient statistics:
  Matrix range     [1e+00, 5e+00]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS ran

In [24]:
assigned_tasks

Unnamed: 0,Employee,Assigned_Tasks,Required_Skills,Total_Hours,Days,19-05-2025,20-05-2025,Person_Contribution (%)
0,Anjhu A,Modify Camunda workflow,Camunda,4,"0 days, 4 hours",Modify Camunda workflow (4 hours),available,23.529412
1,Inapakurthi Manikanta,Create test cases for new features,Testing,5,"0 days, 5 hours",Create test cases for new features (5 hours),available,29.411765
2,Logesh V,Parameter tuning,Python,2,"0 days, 2 hours",Parameter tuning (2 hours),available,11.764706
3,Sethuramalingam Nellainayagam,Define deployment steps in Camunda,Camunda,6,"0 days, 6 hours",Define deployment steps in Camunda (6 hours),available,35.294118
4,Amol Radheshyam Pardhi,"Implement document addition logic, Write requi...","Python, Python",8,"1 days, 0 hours","Implement document addition logic (5 hours), W...",available,47.058824
5,Arpit Lad,Design data structure,Python,3,"0 days, 3 hours",Design data structure (3 hours),available,17.647059
6,Karanam Sri Ram,Implement DB synchronization logic,SQL,5,"0 days, 5 hours",Implement DB synchronization logic (5 hours),available,29.411765
7,Manisha Baid,Set up backend for integration,SQL,3,"0 days, 3 hours",Set up backend for integration (3 hours),available,17.647059
8,Youvashri J.,Set up Dialogflow integration,Python,3,"0 days, 3 hours",Set up Dialogflow integration (3 hours),available,17.647059
9,Maddi Lalu Sainath,Redeploy updated model,Python,3,"0 days, 3 hours",Redeploy updated model (3 hours),available,17.647059
