# 0. The Obligatory Part

In [None]:
# Import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from competency_assessment import CompetencyAssessment
from ortools.sat.python import cp_model

# 1. Define the Data Structure

In [None]:
# Run this if the data in Local/Repository
new_employee_path = './mini_data/mini_data - employee.csv'
new_task_path = './mini_data/mini_data - task.csv'

## 1.1. Pre-Processing: Employee Data

In [None]:
# Read data
employee_skills_df = pd.read_csv(new_employee_path, index_col='employee_id')
employee_skills_df.drop(columns=['No', 'Role'], inplace=True, errors='ignore')

employees = employee_skills_df.index.tolist()
skills_name = employee_skills_df.columns[1:].tolist()

employee_skills_df

## 1.2. Pre-Processing: Task Data

In [None]:
task_df = pd.read_csv(new_task_path, index_col='task_id')

tasks = task_df.index.tolist()
company_names = list(set(task_df['project_id']))
story_points = task_df['story_points'].to_dict()

task_df

## 1.3. Group the task data by company/project

In [None]:
# convert to dictionary each company and its task
company_tasks = {}

for company in company_names:
  company_tasks[company] = task_df[task_df['project_id'] == company].index.tolist()

# sort the company tasks from C1 to C5
company_tasks = dict(sorted(company_tasks.items()))

company_tasks_df = pd.DataFrame.from_dict(company_tasks, orient='index')
company_tasks_df

In [None]:
n = len(employees) * len(tasks)
print(n)

## 1.4. Pre-Processing: Competency Assesment

First, create RCD-ACD Dataframe that we get from Task Dataframe for RCD and from Employee Dataframe for ACD.

### 1.4.1 Required Competence Data

In [None]:
rcd_df = task_df.drop(columns=['project_id', 'story_points'])
rcd_df

### 1.4.2 Acquired Competence Data

In [None]:
# create a copy of the original DataFrame
acd_df = employee_skills_df.copy()
acd_df

### 1.4.3 Fit the Data

In [None]:
ca = CompetencyAssessment(rcd_df, acd_df)
qs, info = ca.fit()

### 1.4.4 Qualification Space

In [None]:
qs

### 1.4.5 Sorted MSG Score for All Tasks

In [None]:
score = ca.rank_MSG(qs)
score

In [None]:
max_score = 0
for talent, tasks in score.items():
    for task, score in tasks.items():
        if score > max_score:
            max_score = score

print(max_score)

### 1.4.6 Filtered Top-n% Highest MSG Score

In [None]:
all_top_n_scores = ca.all_top_n_score(score, 50)
all_top_n_scores

In [None]:
top_score = ca.top_n_score(score, 30)
top_score

# 2. Construct the Model

In [None]:
model = cp_model.CpModel()

# 3. Build the Decision Variable

We have 3 sets:

$$
\text{sets} = \begin{cases}
I &: \text{set of tasks} \\
J &: \text{set of employees} \\
K &: \text{set of projects}
\end{cases}
$$

Next, we define parameters, scalars, and data structures. Let:

$$
\begin{align*}
i & = \text{task } i \\
j & = \text{employee } j \\
k & = \text{project } k \\
s_i & = \text{story points of task } i \\
e_{ij} & = \text{similarity skills of employee } j \text{ for task } i \\
\end{align*}
$$

**Decision Variables:**

$$
\begin{align*}
x_{ijk} & = \text{Binary variable indicating whether employee } j \text{ is assigned to task } k \text{ for day } i \\
y_{jk} & = \text{Binary variable indicating whether employee } j \text{ is assigned to any task from company } k \\
\end{align*}
$$

In [None]:
max_employee_workload = 5

In [None]:
# Create decision variables for x and y
x = {}
for k, task in company_tasks.items():
    for i in task:
        for j in employees:
            x[(i, j, k)] = model.NewIntVar(0, 1, f'x_{i}_{j}_{k}')

# decision variable y represent cardinility of each employee and company
y = {}
for j in employees:
    for k in company_tasks.keys():
        y[(j, k)] = model.NewIntVar(0, 1, f'y_{j}_{k}')

# decision variables max_workload
max_workload = model.NewIntVar(0, max_employee_workload, 'max_workload')

In [None]:
print(x)
print(y)

# 4. Subject to the Constraint

## 4.1. Constraint 1: Each Task is Assigned to One Employee

$$
\sum _{j\in J}\:x_{ijk}\:=\:1 \quad \forall i \in k, \: k \in K
$$

In [None]:
# constraint 1: each task assigned to one talent
for k, task in company_tasks.items():
    for i in task:
        model.Add(sum(x[(i, j, k)] for j in employees) == 1)

## 4.2. Constraint 2: Each employee works for one company at a time

Pre-Processing for Constraint 2:
$$
\sum _{i\in I_k}x_{ijk} > 0 \: \rightarrow \: y_{jk}=1 \quad \forall j\in J, \: k\in K\:
$$

In [None]:
# pre-processing constraint 2
for j in employees:
    for k, task in company_tasks.items():
        # Create a temporary list to hold the sum of x[i][j][k] for all i
        temp_sum = []
        
        for i in task:
            temp_sum.append(x[(i, j, k)])
        
        # Add a constraint to the model: y[j][k] is 1 if the sum of x[i][j][k] for all i is > 0, and 0 otherwise
        model.Add(sum(temp_sum) > 0).OnlyEnforceIf(y[(j, k)])
        model.Add(sum(temp_sum) <= 0).OnlyEnforceIf(y[(j, k)].Not())

$$
\sum _{k\in K}y_{jk}\le 1 \quad \forall j\in J
$$

In [None]:
# create constraint 2: each employee can only work on one task
for j in employees:
    # The sum of y[j][k] for all companies (k) should be <= 1
    model.Add(sum(y[(j, k)] for k in company_tasks.keys()) <= 1)

## 4.3. Constraint 3: Employee workload doesn't exceed the capacity

$$
\sum _{i \in I} s_i \cdot x_{ijk} \le max\_workload \quad \forall j\in J, \: k\in K
$$

In [None]:
for j in employees:
  for k, tasks in company_tasks.items():
    model.Add(sum(story_points[i] * x[(i, j, k)] for i in tasks) <= max_employee_workload)

## 4.4 Constraint 4: Maximum workload is greater than or equal to the workload of each employee For Objective 3

$$
max\_workload \ge \sum_{i \in I} \sum_{k \in K} s_i\cdot x_{ijk}, \quad \forall j\in J\:\:
$$

In [None]:
# constraint 4: max_workload is greater than or equal to the workload of each employee
for j in employees:
    model.Add(max_workload >= sum(story_points[i] * x[i, j, k] for k, tasks in company_tasks.items() for i in tasks))

# 5. Single Objective Approach: 1) Minimize The Idle Employee
## 5.1. Set The Objective Model

$$
\mu _{Z_1} = min.\:I_j=\sum _{j\in \:J}\:\left(1\:-\:\sum _{k\in \:K}\:y_{jk}\right) \quad \tag{1}
$$

In [None]:
# objective 1
idle = []

for j in employees:  
  idle.append(1 - sum(y[j, k] for k in company_tasks.keys()))

mu_Z_1 = sum(idle)

# single objective 1
model.Minimize(mu_Z_1)

## 5.2. Solve The Model of Objective $(1)$

In [None]:
solver = cp_model.CpSolver()
status = solver.Solve(model)

#### Generic Function

In [None]:
def get_employee_tasks(j, company_tasks, solver, score, story_points, max_employee_workload):
  task = []
  sim = []
  comp = []
  sp = 0

  for k, tasks in company_tasks.items():
    for i in tasks:
      if solver.Value(x[i, j, k]) == 1:
        print(f'Task {i} assigned to Employee {j}')
        print(f'Company\t\t\t: {k}')
        print(f'Story Points\t\t: {story_points[i]}')
        print(f"Metrics score\t: {score[j][i]:.10f}\n")

        task.append(i)
        sim.append(score[j][i])
        comp.append(k)
        sp += story_points[i]

  wasted_sp = max_employee_workload - sp if sp > 0 else 0
  return comp, task, sp, wasted_sp, sim

### 5.2.1 Print The Solver Results

In [None]:
if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
  print('Solution Found!')
  print(f'Obj. Value 1 i.e. Total Idle Employees: {solver.ObjectiveValue()}\n')  

  x_hat_1 = {j: get_employee_tasks(j, company_tasks, solver, score, story_points, max_employee_workload) for j in employees}
else:
  print('No Solution Found!')
  x_hat_1 = {}

## 5.3. Show the Solver's Result

In [None]:
# Set display options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Convert dictionary to DataFrame and set 'employee' as index
result_1 = pd.DataFrame.from_dict(x_hat_1, orient='index', columns=['company', 'assigned_task', 'sum_sp', 'wasted_sp', 'assessment_score'])
result_1.index.name = 'employee'

result_1

### 5.3.1 Statistics of The Objective

In [None]:
total_employee = len(employees)
total_sp = sum(story_points.values())
total_active_employee = len(set(employee for employee in x_hat_1.keys()))
total_active_sp = sum(value[2] for value in x_hat_1.values())
total_idle_employee = total_employee - total_active_employee
total_wasted_sp = total_sp - total_active_sp

print(f'Total Employee\t\t\t: {total_employee}')
print(f'Total Active Employee\t\t: {total_active_employee}\t{(total_active_employee/total_employee)*100:.2f}%')
print(f'Total Idle Employee\t\t: {total_idle_employee}\t{(total_idle_employee/total_employee)*100:.2f}%\n')
print(f'Total Story Points\t\t: {total_sp}')
print(f'Total Active Story Points\t: {total_active_sp}\t{(total_active_sp/total_sp)*100:.2f}%')
print(f'Total Wasted Story Points\t: {total_wasted_sp}\t{(total_wasted_sp/total_sp)*100:.2f}%\n')

### 5.3.2. Distribution With Respect to the Assessment Score

In [None]:
# make boxplot for objective 1 with respect to the assessment score
assessment_score_1 = result_1['assessment_score'].explode().reset_index(drop=True)
assessment_score_1.plot(kind='box')
plt.title('Assessment Score Boxplot of Objective 1')
plt.show()

# 6. Single Objective Approach: 1) Minimize The Idle Employee
## 6.1. Set The Objective Model

$$
\mu _{Z_2} = max.\:A_{ij} = \frac{\:1}{\:\left(E\cdot n\right)} \sum _{i\in \:I} \sum _{j\in \:J} \: e_{ij} \cdot x_{ijk} \quad \forall k \in K \quad \tag{2}
$$

In [None]:
# objective 2
assessment_score = []

for k, tasks in company_tasks.items():
  assessment_score.append((1/(max_score * n)) * (sum(score[j][i] * x[i, j, k] for j in employees for i in tasks)))

mu_Z_2 = sum(assessment_score)

# single objective 2
model.Maximize(mu_Z_2)

## 6.2. Solve The Model of Objective $(2)$

In [None]:
solver = cp_model.CpSolver()
status = solver.Solve(model)

### 6.2.1 Print The Solver Results

In [None]:
if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
  print('Solution Found!')
  print(f'Obj. Value 2 i.e. Total Score: {solver.ObjectiveValue()}\n')  

  x_hat_2 = {j: get_employee_tasks(j, company_tasks, solver, score, story_points, max_employee_workload) for j in employees}
else:
  print('No Solution Found!')
  x_hat_2 = {}

## 6.3. Show the Solver's Result

In [None]:
# Set display options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Convert dictionary to DataFrame and set 'employee' as index
result_2 = pd.DataFrame.from_dict(x_hat_2, orient='index', columns=['company', 'assigned_task', 'sum_sp', 'wasted_sp', 'assessment_score'])
result_2.index.name = 'employee'

result_2

### 6.3.1 Statistics of The Objective

In [None]:
total_employee = len(employees)
total_sp = sum(story_points.values())
total_active_employee = len(set(employee for employee in x_hat_2.keys()))
total_active_sp = sum(value[2] for value in x_hat_2.values())
total_idle_employee = total_employee - total_active_employee
total_wasted_sp = total_sp - total_active_sp

print(f'Total Employee\t\t\t: {total_employee}')
print(f'Total Active Employee\t\t: {total_active_employee}\t{(total_active_employee/total_employee)*100:.2f}%')
print(f'Total Idle Employee\t\t: {total_idle_employee}\t{(total_idle_employee/total_employee)*100:.2f}%\n')
print(f'Total Story Points\t\t: {total_sp}')
print(f'Total Active Story Points\t: {total_active_sp}\t{(total_active_sp/total_sp)*100:.2f}%')
print(f'Total Wasted Story Points\t: {total_wasted_sp}\t{(total_wasted_sp/total_sp)*100:.2f}%\n')

### 6.3.2. Distribution With Respect to the Assessment Score

In [None]:
# make boxplot for objective 1 with respect to the assessment score
assessment_score_2 = result_2['assessment_score'].explode().reset_index(drop=True)
assessment_score_2.plot(kind='box')
plt.title('Assessment Score Boxplot of Objective 2')
plt.show()