In [1]:
!pip install pulp
!pip install optuna

Collecting pulp
  Downloading pulp-3.1.1-py3-none-any.whl.metadata (1.3 kB)
Downloading pulp-3.1.1-py3-none-any.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m53.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pulp
Successfully installed pulp-3.1.1
Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.wh

In [2]:
import pandas as pd

# Load the person and task data with cost
person_data = pd.read_excel('/content/employee_task_data.xlsx', sheet_name="Employee_table")
df1 = pd.DataFrame(person_data)

task_data = pd.read_excel('/content/employee_task_data.xlsx', sheet_name="Task_table")
df2 = pd.DataFrame(task_data)
df2

Unnamed: 0,task_name,required_skills,duration,complexity
0,task1a,sql,4,2
1,task1b,aws,4,2
2,task1c,python,4,2
3,task1d,html,4,2
4,task2a,sql,4,2
...,...,...,...,...
72,task19a,docker,4,4
73,task19b,python,4,4
74,task19c,javascript,4,4
75,task19d,react,4,4


In [4]:
import pandas as pd
from pulp import LpProblem, LpMinimize, LpVariable, lpSum, LpBinary, value
import optuna

# Create skill to employee mapping and efficiency data
skill_to_employee = {}
employee_efficiency = {}
employee_performance = {}

for _, row in df1.iterrows():
    employee = row['name']
    skill = row['skills']
    efficiency = row['efficiency']
    performance = row['performance_rating']

    if skill not in skill_to_employee:
        skill_to_employee[skill] = []
    skill_to_employee[skill].append(employee)

    employee_efficiency[employee] = efficiency
    employee_performance[employee] = performance

# Calculate target hours based on efficiency
total_task_duration = df2['duration'].sum()
total_efficiency = sum(employee_efficiency.values())
target_hours = total_task_duration / total_efficiency

# Cost calculation function
def calculate_cost(duration, efficiency, complexity, performance, alpha, beta, gamma):
    cost_term_1 = alpha * (duration / efficiency)
    cost_term_2 = beta * complexity
    cost_term_3 = gamma * (complexity / performance)
    return cost_term_1 + cost_term_2 + cost_term_3

# Objective function for Optuna
def objective(trial):
    # Define hyperparameters to optimize
    lambda_ = trial.suggest_float("lambda_", 0.0, 1.0)
    alpha = trial.suggest_float("alpha", 0.0, 1.0)
    beta = trial.suggest_float("beta", 0.0, 1.0)
    # Ensure alpha + beta + gamma = 1
    alpha = trial.suggest_float("alpha", 0.0, 1.0)
    beta = trial.suggest_float("beta", 0.0, 1.0 - alpha)
    gamma = 1.0 - alpha - beta

    # Initialize the optimization problem
    prob = LpProblem("Task_Assignment_with_Cost", LpMinimize)

    # Decision Variables
    task_assignment = {}
    for _, task_row in df2.iterrows():
        task_name = task_row['task_name']
        required_skill = task_row['required_skills']

        if required_skill in skill_to_employee:
            for employee in skill_to_employee[required_skill]:
                var = LpVariable(f"x_{employee}_{task_name}", 0, 1, LpBinary)
                task_assignment[(employee, task_name)] = var

    # Deviation Variables
    deviation_above = LpVariable("Deviation_Above", lowBound=0)
    deviation_below = LpVariable("Deviation_Below", lowBound=0)

    # Objective Function
    total_cost = lpSum([
        calculate_cost(
        df2.loc[df2['task_name'] == task_name, 'duration'].values[0],
        employee_efficiency[employee],
        df2.loc[df2['task_name'] == task_name, 'complexity'].values[0],
        employee_performance[employee],
        alpha, beta, gamma
    ) * var
        for (employee, task_name), var in task_assignment.items()
    ])

    prob += lambda_ * (deviation_above + deviation_below) + (1 - lambda_) * total_cost, "Minimize_Objective"

    # Constraint 1: Each task must be assigned to exactly one employee
    for _, task_row in df2.iterrows():
        task_name = task_row['task_name']
        required_skill = task_row['required_skills']
        if required_skill in skill_to_employee:
            prob += lpSum([task_assignment[(employee, task_name)] for employee in skill_to_employee[required_skill]]) == 1

    # Constraint 2: Workload balance
    for employee in df1['name'].unique():
        employee_hours = lpSum([
            task_assignment[(employee, task_name)] * df2.loc[df2['task_name'] == task_name, 'duration'].values[0]
            for task_name in df2['task_name'] if (employee, task_name) in task_assignment
        ])
        employee_target = target_hours * employee_efficiency[employee]

        prob += employee_hours <= employee_target + deviation_above
        prob += employee_hours >= employee_target - deviation_below

    # Constraint 3: Ensure alpha + beta + gamma = 1


    # Solve the optimization problem
    prob.solve()

    # Return the objective value
    trial.set_user_attr("Deviation_Above", value(deviation_above))
    trial.set_user_attr("Deviation_Below", value(deviation_below))
    trial.set_user_attr("Gamma", gamma)
    return value(prob.objective)

# Execute Optuna optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

# Get best parameters
best_params = study.best_params
print("Optimized Hyperparameters:", best_params)

# Print top 10 hyperparameter sets
top_10_trials = sorted(study.trials, key=lambda x: x.value)[:10]
print("Top 10 Hyperparameter Sets:")
for i, trial in enumerate(top_10_trials, 1):
    print(f"Rank {i}: Params: {trial.params}, Gamma: {trial.user_attrs.get('Gamma', 'N/A')}, Objective Value: {trial.value}, Deviation Above: {trial.user_attrs.get('Deviation_Above', 'N/A')}, Deviation Below: {trial.user_attrs.get('Deviation_Below', 'N/A')}")
print("Best Objective Function Value:", study.best_value)

# Print best deviation values
best_trial = study.best_trial
print("Best Deviation Above:", best_trial.user_attrs.get("Deviation_Above", "Not Available"))
print("Best Deviation Below:", best_trial.user_attrs.get("Deviation_Below", "Not Available"))


[I 2025-05-18 21:41:52,023] A new study created in memory with name: no-name-d006abfb-9e94-4abc-8b3b-ca88da9dfb0f
[I 2025-05-18 21:41:52,287] Trial 0 finished with value: 142.30561834883025 and parameters: {'lambda_': 0.4957655042392731, 'alpha': 0.6091741462896835, 'beta': 0.8853232331402979}. Best is trial 0 with value: 142.30561834883025.
[I 2025-05-18 21:41:52,557] Trial 1 finished with value: 116.81045238605647 and parameters: {'lambda_': 0.5307977183784198, 'alpha': 0.5296059041092158, 'beta': 0.7121331822475727}. Best is trial 1 with value: 116.81045238605647.
[I 2025-05-18 21:41:52,823] Trial 2 finished with value: 117.37059084507325 and parameters: {'lambda_': 0.5472874483750139, 'alpha': 0.9289831779044012, 'beta': 0.24880677927966854}. Best is trial 1 with value: 116.81045238605647.
[I 2025-05-18 21:41:53,187] Trial 3 finished with value: 23.048525984785787 and parameters: {'lambda_': 0.9777751362725287, 'alpha': 0.9117727544689072, 'beta': 0.6657728653850203}. Best is trial

Optimized Hyperparameters: {'lambda_': 0.9997514263502957, 'alpha': 0.6686450451631214, 'beta': 0.9104255053121345}
Top 10 Hyperparameter Sets:
Rank 1: Params: {'lambda_': 0.9997514263502957, 'alpha': 0.6686450451631214, 'beta': 0.9104255053121345}, Gamma: -0.5790705504752559, Objective Value: 17.06509946760751, Deviation Above: 7.1818182, Deviation Below: 9.8181818
Rank 2: Params: {'lambda_': 0.9995581545990634, 'alpha': 0.5456144838916726, 'beta': 0.6844133490474062}, Gamma: -0.2300278329390788, Objective Value: 17.093632631136284, Deviation Above: 7.1818182, Deviation Below: 9.8181818
Rank 3: Params: {'lambda_': 0.9993318794037425, 'alpha': 0.4887287261956208, 'beta': 0.862084089442228}, Gamma: -0.35081281563784883, Objective Value: 17.150281080196727, Deviation Above: 7.1818182, Deviation Below: 9.8181818
Rank 4: Params: {'lambda_': 0.9991772946540108, 'alpha': 0.48438852248003605, 'beta': 0.8603571780416415}, Gamma: -0.3447457005216775, Objective Value: 17.18425938105586, Deviatio

# Using Grid search

In [7]:
import pandas as pd
from pulp import LpProblem, LpMinimize, LpVariable, lpSum, LpBinary, value
from itertools import product


# Create skill to employee mapping and efficiency data
skill_to_employee = {}
employee_efficiency = {}
employee_performance = {}

for _, row in df1.iterrows():
    employee = row['name']
    skill = row['skills']
    efficiency = row['efficiency']
    performance = row['performance_rating']

    if skill not in skill_to_employee:
        skill_to_employee[skill] = []
    skill_to_employee[skill].append(employee)

    employee_efficiency[employee] = efficiency
    employee_performance[employee] = performance

# Calculate target hours based on efficiency
total_task_duration = df2['duration'].sum()
total_efficiency = sum(employee_efficiency.values())
target_hours = total_task_duration / total_efficiency

# Define cost calculation function
def calculate_cost(complexity, performance):
    return complexity * (6 - performance)

# Define hyperparameter search space
lambda_range = [0.3, 0.5, 0.7]
alpha_range = [1/3, 0.3, 0.4]
beta_range = [1/3, 0.3, 0.4]
gamma_range = [1/3, 0.4, 0.2]


best_objective = float('inf')
best_params = None
best_results = {}

# Perform Grid Search
for lambda_, alpha, beta, gamma in product(lambda_range, alpha_range, beta_range, gamma_range):
    # Initialize the optimization problem
    prob = LpProblem("Task_Assignment_with_Cost", LpMinimize)

    # Decision Variables
    task_assignment = {}
    for _, task_row in df2.iterrows():
        task_name = task_row['task_name']
        required_skill = task_row['required_skills']

        if required_skill in skill_to_employee:
            for employee in skill_to_employee[required_skill]:
                var = LpVariable(f"x_{employee}_{task_name}", 0, 1, LpBinary)
                task_assignment[(employee, task_name)] = var

    # Deviation Variables
    deviation_above = LpVariable("Deviation_Above", lowBound=0)
    deviation_below = LpVariable("Deviation_Below", lowBound=0)

    # Objective Function
    total_cost = lpSum([
        calculate_cost(df2.loc[df2['task_name'] == task_name, 'complexity'].values[0],
                       employee_performance[employee]) * var
        for (employee, task_name), var in task_assignment.items()
    ])

    prob += lambda_ * (alpha * deviation_above + beta * deviation_below) + (1 - lambda_) * gamma * total_cost, "Minimize_Objective"

    # Constraint 1: Each task must be assigned to exactly one employee
    for _, task_row in df2.iterrows():
        task_name = task_row['task_name']
        required_skill = task_row['required_skills']
        if required_skill in skill_to_employee:
            prob += lpSum([task_assignment[(employee, task_name)] for employee in skill_to_employee[required_skill]]) == 1, f"Task_Assignment_{task_name}"

    # Constraint 2: Workload balance
    for employee in df1['name'].unique():
        employee_hours = lpSum([
            task_assignment[(employee, task_name)] * df2.loc[df2['task_name'] == task_name, 'duration'].values[0]
            for task_name in df2['task_name'] if (employee, task_name) in task_assignment
        ])
        employee_target = target_hours * employee_efficiency[employee]

        prob += employee_hours <= employee_target + deviation_above, f"Max_Hours_{employee}"
        prob += employee_hours >= employee_target - deviation_below, f"Min_Hours_{employee}"

    # Solve the optimization problem
    prob.solve()

    # Track the best solution
    if value(prob.objective) < best_objective:
        best_objective = value(prob.objective)
        best_params = (lambda_, alpha, beta, gamma)

        # Prepare the result dictionary in the original employee order
        result_dict = {emp: {
            "Assigned_Tasks": [],
            "Required_Skills": [],
            "Total_Duration": 0,
            "Efficiency": employee_efficiency[emp],
            "Performance_Rating": employee_performance[emp]
        } for emp in df1['name'].unique()}

        # Aggregate results
        for (employee, task_name), var in task_assignment.items():
            if var.varValue == 1:
                task_row = df2[df2['task_name'] == task_name].iloc[0]
                result_dict[employee]["Assigned_Tasks"].append(task_name)
                result_dict[employee]["Required_Skills"].append(task_row['required_skills'])
                result_dict[employee]["Total_Duration"] += task_row['duration']

        # Format the final results
        best_results = [
            {
                "Employee": emp,
                "Assigned_Tasks": ", ".join(data["Assigned_Tasks"]),
                "Required_Skills": ", ".join(set(data["Required_Skills"])),
                "Efficiency": data["Efficiency"],
                "Performance_Rating": data["Performance_Rating"],
                "Total_Duration": data["Total_Duration"]
            }
            for emp, data in result_dict.items()
        ]

# Save the best aggregated results to CSV
results_df = pd.DataFrame(best_results)
results_df.to_csv('optimized_assignment_with_cost.csv', index=False)

# Print the best hyperparameters and objective value
print(f"Optimized Hyperparameters: lambda={best_params[0]}, alpha={best_params[1]}, beta={best_params[2]}, gamma={best_params[3]}")
print(f"Best Objective Function Value: {best_objective}")
print("Aggregated results saved to 'optimized_assignment_results.csv'")

Optimized Hyperparameters: lambda=0.7, alpha=0.3, beta=0.3, gamma=0.2
Best Objective Function Value: 20.849999999999994
Aggregated results saved to 'optimized_assignment_results.csv'


In [6]:
# Print Objective Value and Deviations
print(f"Total Objective Value: {value(prob.objective)}")
print(f"Deviation Above Target Hours: {deviation_above.varValue}")
print(f"Deviation Below Target Hours: {deviation_below.varValue}")

Total Objective Value: 22.039999999999996
Deviation Above Target Hours: 7.1818182
Deviation Below Target Hours: 9.8181818
