# 0. The Obligatory Part

In [1]:
# Import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from competency_assessment import CompetencyAssessment
from ortools.sat.python import cp_model

# 1. Define the Data Structure

In [2]:
# Run this if the data in Local/Repository
new_employee_path = './mini_data/mini_data - employee.csv'
new_task_path = './mini_data/mini_data - task.csv'

## 1.1. Pre-Processing: Employee Data

In [3]:
# Read data
employee_skills_df = pd.read_csv(new_employee_path, index_col='employee_id')
employee_skills_df.drop(columns=['No', 'Role'], inplace=True, errors='ignore')

employees = employee_skills_df.index.tolist()
skills_name = employee_skills_df.columns[1:].tolist()

employee_skills_df

Unnamed: 0_level_0,math,python,sql,cloud,database,optimization
employee_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Talent 1,5,5,3,2,2,5
Talent 2,2,5,4,3,3,1
Talent 3,4,4,5,3,4,3
Talent 4,3,4,5,5,4,2
Talent 5,2,3,2,4,5,1


## 1.2. Pre-Processing: Task Data

In [4]:
task_df = pd.read_csv(new_task_path, index_col='task_id')

tasks = task_df.index.tolist()
company_names = list(set(task_df['project_id']))
story_points = task_df['story_points'].to_dict()

task_df

Unnamed: 0_level_0,project_id,story_points,math,python,sql,cloud,database,optimization
task_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
T1,P2,1,0,5,3,4,4,0
T2,P1,2,3,5,4,5,3,1
T3,P2,3,5,4,2,0,0,5
T4,P3,5,2,4,0,3,0,4
T5,P3,1,0,3,5,1,5,0
T6,P1,1,0,1,4,5,5,0
T7,P3,2,4,4,1,1,2,3
T8,P1,3,1,0,5,5,3,0
T9,P2,1,3,4,3,0,0,2
T10,P1,3,5,5,0,0,0,5


## 1.3. Group the task data by company/project

In [5]:
# convert to dictionary each company and its task
company_tasks = {}

for company in company_names:
  company_tasks[company] = task_df[task_df['project_id'] == company].index.tolist()

# sort the company tasks from C1 to C5
company_tasks = dict(sorted(company_tasks.items()))

company_tasks_df = pd.DataFrame.from_dict(company_tasks, orient='index')
company_tasks_df

Unnamed: 0,0,1,2,3
P1,T2,T6,T8,T10
P2,T1,T3,T9,
P3,T4,T5,T7,


## 1.4. Pre-Processing: Competency Assesment

First, create RCD-ACD Dataframe that we get from Task Dataframe for RCD and from Employee Dataframe for ACD.

### 1.4.1 Required Competence Data

In [6]:
rcd_df = task_df.drop(columns=['project_id', 'story_points'])
rcd_df

Unnamed: 0_level_0,math,python,sql,cloud,database,optimization
task_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
T1,0,5,3,4,4,0
T2,3,5,4,5,3,1
T3,5,4,2,0,0,5
T4,2,4,0,3,0,4
T5,0,3,5,1,5,0
T6,0,1,4,5,5,0
T7,4,4,1,1,2,3
T8,1,0,5,5,3,0
T9,3,4,3,0,0,2
T10,5,5,0,0,0,5


### 1.4.2 Acquired Competence Data

In [7]:
# create a copy of the original DataFrame
acd_df = employee_skills_df.copy()
acd_df

Unnamed: 0_level_0,math,python,sql,cloud,database,optimization
employee_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Talent 1,5,5,3,2,2,5
Talent 2,2,5,4,3,3,1
Talent 3,4,4,5,3,4,3
Talent 4,3,4,5,5,4,2
Talent 5,2,3,2,4,5,1


### 1.4.3 Fit the Data

In [8]:
ca = CompetencyAssessment(rcd_df, acd_df)
qs, info = ca.fit()

### 1.4.4 Qualification Space

In [9]:
qs

{'Talent 1': {'T1': [0.0, -1.0, -0.16666666666666666, 'Under-Qualified'],
  'T2': [0.4761904761904761,
   -1.0476190476190477,
   -0.09523809523809527,
   'Under-Qualified'],
  'T3': [0.375, 0, 0.0625, 'Qualified'],
  'T4': [1.076923076923077,
   -0.23076923076923073,
   0.14102564102564105,
   'Qualified'],
  'T5': [0.5, -1.785714285714286, -0.21428571428571433, 'Under-Qualified'],
  'T6': [0.26666666666666666,
   -2.2666666666666666,
   -0.3333333333333333,
   'Under-Qualified'],
  'T7': [1.133333333333333, 0, 0.18888888888888886, 'Qualified'],
  'T8': [0.2857142857142857,
   -2.0000000000000004,
   -0.2857142857142858,
   'Under-Qualified'],
  'T9': [1.3333333333333333, 0, 0.2222222222222222, 'Qualified'],
  'T10': [0.0, 0, 0.0, 'Qualified']},
 'Talent 2': {'T1': [0.1875, -0.5, -0.052083333333333336, 'Under-Qualified'],
  'T2': [0.0, -0.6190476190476191, -0.10317460317460318, 'Under-Qualified'],
  'T3': [0.5, -2.1875, -0.28125, 'Under-Qualified'],
  'T4': [0.3076923076923077,
   -0.

### 1.4.5 Sorted MSG Score for All Tasks

In [10]:
score = ca.rank_MSG(qs)
score

{'Talent 1': {'T9': 0.2222222222222222,
  'T7': 0.18888888888888886,
  'T4': 0.14102564102564105,
  'T3': 0.0625,
  'T10': 0.0,
  'T2': -0.09523809523809527,
  'T1': -0.16666666666666666,
  'T5': -0.21428571428571433,
  'T8': -0.2857142857142858,
  'T6': -0.3333333333333333},
 'Talent 2': {'T9': 0.027777777777777773,
  'T7': -0.033333333333333326,
  'T1': -0.052083333333333336,
  'T5': -0.08333333333333337,
  'T4': -0.10256410256410257,
  'T2': -0.10317460317460318,
  'T8': -0.1666666666666667,
  'T6': -0.17777777777777773,
  'T3': -0.28125,
  'T10': -0.38888888888888884},
 'Talent 3': {'T9': 0.1527777777777778,
  'T7': 0.11111111111111112,
  'T4': 0.0,
  'T5': -9.25185853854297e-18,
  'T2': -0.023809523809523836,
  'T1': -0.03125,
  'T8': -0.04761904761904764,
  'T6': -0.08888888888888886,
  'T3': -0.09375,
  'T10': -0.22222222222222218},
 'Talent 4': {'T9': 0.08333333333333333,
  'T8': 0.05952380952380953,
  'T7': 0.05555555555555556,
  'T1': 0.052083333333333336,
  'T2': 0.023809523

### 1.4.6 Filtered Top-n% Highest MSG Score

In [11]:
all_top_n_scores = ca.all_top_n_score(score, 50)
all_top_n_scores

{'Talent 1': {'T9': 0.2222222222222222,
  'T7': 0.18888888888888886,
  'T4': 0.14102564102564105,
  'T3': 0.0625,
  'T10': 0.0},
 'Talent 2': {'T9': 0.027777777777777773,
  'T7': -0.033333333333333326,
  'T1': -0.052083333333333336,
  'T5': -0.08333333333333337,
  'T4': -0.10256410256410257},
 'Talent 3': {'T9': 0.1527777777777778,
  'T7': 0.11111111111111112,
  'T4': 0.0,
  'T5': -9.25185853854297e-18,
  'T2': -0.023809523809523836},
 'Talent 4': {'T9': 0.08333333333333333,
  'T8': 0.05952380952380953,
  'T7': 0.05555555555555556,
  'T1': 0.052083333333333336,
  'T2': 0.023809523809523798},
 'Talent 5': {'T7': -0.08888888888888889,
  'T1': -0.09375,
  'T6': -0.12222222222222219,
  'T5': -0.14285714285714288,
  'T8': -0.1547619047619048}}

# 2. Construct the Model

In [12]:
model = cp_model.CpModel()

# 3. Build the Decision Variable

We have 3 sets:

$$
\text{sets} = \begin{cases}
I &: \text{set of tasks} \\
J &: \text{set of employees} \\
K &: \text{set of projects}
\end{cases}
$$

Next, we define parameters, scalars, and data structures. Let:

$$
\begin{align*}
i & = \text{task } i \\
j & = \text{employee } j \\
k & = \text{project } k \\
s_i & = \text{story points of task } i \\
e_{ij} & = \text{similarity skills of employee } j \text{ for task } i \\
\end{align*}
$$

**Decision Variables:**

$$
\begin{align*}
x_{ijk} & = \text{Binary variable indicating whether employee } j \text{ is assigned to task } k \text{ for day } i \\
y_{jk} & = \text{Binary variable indicating whether employee } j \text{ is assigned to any task from company } k \\
\end{align*}
$$

In [13]:
max_employee_workload = 5

In [14]:
# Create decision variables for x and y
x = {}
for k, task in company_tasks.items():
    for i in task:
        for j in employees:
            x[(i, j, k)] = model.NewIntVar(0, 1, f'x_{i}_{j}_{k}')

# decision variable y represent cardinility of each employee and company
y = {}
for j in employees:
    for k in company_tasks.keys():
        y[(j, k)] = model.NewIntVar(0, 1, f'y_{j}_{k}')

# decision variables max_workload
max_workload = model.NewIntVar(0, max_employee_workload, 'max_workload')

In [15]:
print(x)
print(y)

{('T2', 'Talent 1', 'P1'): x_T2_Talent 1_P1(0..1), ('T2', 'Talent 2', 'P1'): x_T2_Talent 2_P1(0..1), ('T2', 'Talent 3', 'P1'): x_T2_Talent 3_P1(0..1), ('T2', 'Talent 4', 'P1'): x_T2_Talent 4_P1(0..1), ('T2', 'Talent 5', 'P1'): x_T2_Talent 5_P1(0..1), ('T6', 'Talent 1', 'P1'): x_T6_Talent 1_P1(0..1), ('T6', 'Talent 2', 'P1'): x_T6_Talent 2_P1(0..1), ('T6', 'Talent 3', 'P1'): x_T6_Talent 3_P1(0..1), ('T6', 'Talent 4', 'P1'): x_T6_Talent 4_P1(0..1), ('T6', 'Talent 5', 'P1'): x_T6_Talent 5_P1(0..1), ('T8', 'Talent 1', 'P1'): x_T8_Talent 1_P1(0..1), ('T8', 'Talent 2', 'P1'): x_T8_Talent 2_P1(0..1), ('T8', 'Talent 3', 'P1'): x_T8_Talent 3_P1(0..1), ('T8', 'Talent 4', 'P1'): x_T8_Talent 4_P1(0..1), ('T8', 'Talent 5', 'P1'): x_T8_Talent 5_P1(0..1), ('T10', 'Talent 1', 'P1'): x_T10_Talent 1_P1(0..1), ('T10', 'Talent 2', 'P1'): x_T10_Talent 2_P1(0..1), ('T10', 'Talent 3', 'P1'): x_T10_Talent 3_P1(0..1), ('T10', 'Talent 4', 'P1'): x_T10_Talent 4_P1(0..1), ('T10', 'Talent 5', 'P1'): x_T10_Talent 5

# 4. Subject to the Constraint

## 4.1. Constraint 1: Each Task is Assigned to One Employee

$$
\sum _{j\in J}\:x_{ijk}\:=\:1 \quad \forall i \in k, \: k \in K
$$

In [16]:
# constraint 1: each task assigned to one talent
for k, task in company_tasks.items():
    for i in task:
        model.Add(sum(x[(i, j, k)] for j in employees) == 1)

## 4.2. Constraint 2: Each employee works for one company at a time

Pre-Processing for Constraint 2:
$$
\sum _{i\in I_k}x_{ijk} > 0 \: \rightarrow \: y_{jk}=1 \quad \forall j\in J, \: k\in K\:
$$

In [17]:
# pre-processing constraint 2
for j in employees:
    for k, task in company_tasks.items():
        # Create a temporary list to hold the sum of x[i][j][k] for all i
        temp_sum = []
        for i in task:
            temp_sum.append(x[(i, j, k)])
        # Add a constraint to the model: y[j][k] is 1 if the sum of x[i][j][k] for all i is > 0, and 0 otherwise
        model.Add(sum(temp_sum) > 0).OnlyEnforceIf(y[(j, k)])
        model.Add(sum(temp_sum) <= 0).OnlyEnforceIf(y[(j, k)].Not())

$$
\sum _{k\in K}y_{jk}\le 1 \quad \forall j\in J
$$

In [18]:
# create constraint 2: each employee can only work on one task
for j in employees:
    # The sum of y[j][k] for all companies (k) should be <= 1
    model.Add(sum(y[(j, k)] for k in company_tasks.keys()) <= 1)

## 4.3. Constraint 3: Employee workload doesn't exceed the capacity

$$
\sum _{i \in I} s_i \cdot x_{ijk} \le max\_workload \quad \forall j\in J, \: k\in K
$$

In [19]:
for j in employees:
  for k, tasks in company_tasks.items():
    model.Add(sum(story_points[i] * x[(i, j, k)] for i in tasks) <= max_employee_workload)

## 4.4 Constraint 4: Maximum workload is greater than or equal to the workload of each employee For Objective 3

$$
max\_workload \ge \sum_{i \in I} \sum_{k \in K} s_i\cdot x_{ijk}, \quad \forall j\in J\:\:
$$

In [20]:
# constraint 4: max_workload is greater than or equal to the workload of each employee
for j in employees:
    model.Add(max_workload >= sum(story_points[i] * x[i, j, k] for k, tasks in company_tasks.items() for i in tasks))

# 5. Single Objective Approach: 1) Minimize The Idle Employee
## 5.1. Set The Objective Model

$$
\mu _{Z_1} = min.\:I_j=\sum _{j\in \:J}\:\left(1\:-\:\sum _{k\in \:K}\:y_{jk}\right) \quad \tag{1}
$$

In [21]:
# objective 1
idle = []

for j in employees:  
  idle.append(1 - sum(y[j, k] for k in company_tasks.keys()))

mu_Z_1 = sum(idle)

# single objective 1
model.Minimize(mu_Z_1)

## 5.2. Solve The Model of Objective $(1)$

In [22]:
solver = cp_model.CpSolver()
status = solver.Solve(model)

: 

#### Generic Function

In [None]:
def get_employee_tasks(j, company_tasks, solver, score, story_points, max_employee_workload):
  task = []
  sim = []
  comp = []
  sp = 0

  for k, tasks in company_tasks.items():
    for i in tasks:
      if solver.Value(x[i, j, k]) == 1:
        print(f'Task {i} assigned to Employee {j}')
        print(f'Company\t\t\t: {k}')
        print(f'Story Points\t\t: {story_points[i]}')
        print(f"Metrics score\t: {score[j][i]:.10f}\n")

        task.append(i)
        sim.append(score[j][i])
        comp.append(k)
        sp += story_points[i]

  wasted_sp = max_employee_workload - sp if sp > 0 else 0
  return comp, task, sp, wasted_sp, sim

### 5.2.1 Print The Solver Results

In [None]:
if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
  print('Solution Found!')
  print(f'Obj. Value -- Total Idle Employees: {solver.ObjectiveValue()}\n')  

  x_hat_1 = {j: get_employee_tasks(j, company_tasks, solver, score, story_points, max_employee_workload) for j in employees}
else:
  print('No Solution Found!')
  x_hat_1 = {}

## 5.3. Show the Solver's Result

In [None]:
# Set display options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Convert dictionary to DataFrame and set 'employee' as index
result_1 = pd.DataFrame.from_dict(x_hat_1, orient='index', columns=['company', 'assigned_task', 'sum_sp', 'wasted_sp', 'assessment_score'])
result_1.index.name = 'employee'

result_1

### 5.3.1 Statistics of The Objective

In [None]:
total_employee = len(employees)
total_sp = sum(story_points.values())
total_active_employee = len(set(employee for employee in x_hat_1.keys()))
total_active_sp = sum(value[2] for value in x_hat_1.values())
total_idle_employee = total_employee - total_active_employee
total_wasted_sp = total_sp - total_active_sp

print(f'Total Employee\t\t\t: {total_employee}')
print(f'Total Active Employee\t\t: {total_active_employee}\t{(total_active_employee/total_employee)*100:.2f}%')
print(f'Total Idle Employee\t\t: {total_idle_employee}\t{(total_idle_employee/total_employee)*100:.2f}%\n')
print(f'Total Story Points\t\t: {total_sp}')
print(f'Total Active Story Points\t: {total_active_sp}\t{(total_active_sp/total_sp)*100:.2f}%')
print(f'Total Wasted Story Points\t: {total_wasted_sp}\t{(total_wasted_sp/total_sp)*100:.2f}%\n')

### 5.3.2. Distribution With Respect to the Assessment Score

In [None]:
# make boxplot for objective 1 with respect to the assessment score
assessment_score_1 = result_1['assessment_score'].explode().reset_index(drop=True)
assessment_score_1.plot(kind='box')
plt.title('Assessment Score Boxplot of Objective 1')
plt.show()