# Employee Scheduling Program
Given the following parameters:
* Task list : with TaskID, Skill required and Priority values
* People List : with Person ID and their Names
* Skill Matrix : with Person ID and Skill ID for that person

We have to schedule the tasks such that:
* The High Priority ones are done as soon as possible
* The work is done in the least amount of time
* Each task takes a full day to perform.
* Each task requires a single person to perform it.

In [64]:
import pandas as pd
import collections
import csv

class people:
    
    def __init__(self,personID,skillID,name):
        """
        Defining the class for ppl that includes the 3 parameters
        @params:
        personID : the ID associated with the person
        skillID : the array of skills that the person has
        name : the name of the person
        """
        self.personID=personID
        self.skillID=skillID
        self.name=name

class task:

    def __init__(self,taskID,skillID,priority):
        """
        Defining the class for tasks that includes the 3 parameters
        @params:
        taskID : the ID associated with the task that needs to be done
        skillID : the skill ID that is req. to complete the task
        priority : priority of the task (True: for urgent,False: for less urgent)
        """
        self.taskID=taskID
        self.skillID=skillID
        self.priority=priority


def add_val_tasks(data_tasks):
    """
    This method returns a list of task values sorted based on priority
    """
    tasks=[]
    for i in range(len(data_tasks)):
        taskID=data_tasks.loc[i]['Id']
        skillID=data_tasks.loc[i]['SkillRequired']
        priority=data_tasks.loc[i]['IsPriority']
        tasks.append(task(taskID,skillID,priority))
    tasks.sort(key= lambda x:x.priority,reverse=True)
    return tasks

def add_val_ppl(data_ppl):
    """
    This method returns a list of people values 
    """
    ppl=[]
    for i in range(len(data_ppl)):
        id=data_ppl.loc[i]['Id']
        name=data_ppl.loc[i]['Name']
        skill=data_ppl.loc[i]['Skills']
        ppl.append(people(personID=id,skillID=skill,name=name))
    return ppl

def addSkills(data,length):
    """
    This returns a dictionary of the list of skills that each person has
    """
    arr={}
    for i in range(1,length+1):
        arr[i]=[]
    for i in range(len(data)):
        personId=int(data.loc[i]['PersonId'])
        skillId=int(data.loc[i]['SkillId'])
        arr[personId].append(skillId)
    return arr

def makeDict(data,skill_num):
    """
    returns a dictionary of skillID vs the list of people that have those skills
    """
    d={}
    for i in range(1,skill_num[0]+1):
        d[i]=[]
    for i in range(len(data)):
        id=data[i].personID
        skills=data[i].skillID
        for i in skills:
            d[i].append(id)
    return d



def takeInput(ppl,skills,tasks):
    """
    This method takes input from diff files and returns 
    ppl: the list of people with IDs,skills and names
    tasks: the list of tasks with IDs,skill req and priority
    skill_num : the max skillID
    """

    data_ppl=pd.read_csv(ppl)
    data_skills=pd.read_csv(skills)
    data_tasks=pd.read_csv(tasks)
    data_ppl['Skills']=addSkills(data_skills,len(data_ppl)).values()
    skill_num=data_ppl['Skills'].max()

    ppl=add_val_ppl(data_ppl)
    tasks=add_val_tasks(data_tasks)    

    return ppl,tasks,skill_num
    


def assign(ppl_data,skills_data,tasks_data):
    """
    assigns the tasks to people and returns an orderedDict of taskID vs the personID and the day assigned
    """
    ppl,tasks,skill_num=takeInput(ppl_data,skills_data,tasks_data)
    data=makeDict(ppl,skill_num) # makes a dictionary of skills vs personID

    main_assign={}
    main_check=[0]*len(tasks)
    day=1
    while (sum(main_check)<len(tasks)):
        person_assign=[0]*len(ppl)
        i=0
        while (sum(person_assign)<len(ppl)) and (i<len(tasks)):
            if main_check[i]==0:
                task_id=tasks[i].taskID
                skill_req=tasks[i].skillID
                for x in data[skill_req]:
                    if person_assign[x-1]==0:
                        person_assign[x-1]=1
                        main_assign[task_id]=[x,day]
                        main_check[i]=1
                        break
            i+=1
        day+=1

    return collections.OrderedDict(sorted(main_assign.items()))

def write_csv(data,filename):
    """
    writes to the csv file
    """
    fields=['TaskID','PersonID','Day']

    with open(filename,'w',newline='') as file:
        writer=csv.writer(file)
        writer.writerow(fields)
        for row in data:
            writer.writerow([row,data[row][0],data[row][1]])


In [13]:
import time
start=time.time()
ppl='data/in/people.csv'
skills='data/in/skillMatrix.csv'
tasks='data/in/tasks-small.csv'
filename='data/out/assignments-small.csv'
write_csv(assign(ppl,skills,tasks),filename)
end=time.time()
print('Time taken for 14 tasks: ',end-start)

Time taken for 14 tasks:  0.02830362319946289


In [14]:
start=time.time()
ppl='data/in/people.csv'
skills='data/in/skillMatrix.csv'
tasks='data/in/tasks-large.csv'
filename='data/out/assignments-large.csv'
write_csv(assign(ppl,skills,tasks),filename)
end=time.time()
print('Time taken for 300 tasks: ',end-start)

Time taken for 300 tasks:  0.15445661544799805


# Data Analysis
We will be analysis data for the large assignments file

## 1. Data Input

In [37]:
data=pd.read_csv('data/out/assignments-large.csv',index_col=0)
tasks=pd.read_csv('data/in/tasks-large.csv')
tasks.index=range(1,301)
data=data.join(tasks['IsPriority'])
data.head()

Unnamed: 0_level_0,PersonID,Day,IsPriority
TaskID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,1,True
2,1,7,False
3,2,7,False
4,3,7,False
5,4,7,False


## 2. High Priority Jobs Analysis
We will be analysing the no of high priority jobs that were assigned and the time within which it was done.

In [54]:
# making the data frame with high priority jobs
high=data[data['IsPriority']==True].drop('IsPriority',axis=1)
high

Unnamed: 0_level_0,PersonID,Day
TaskID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,1
11,2,1
23,4,1
24,3,1
36,5,1
38,6,1
39,1,2
44,6,2
71,2,2
75,1,3


In [63]:
# Number of High Priority jobs assigned:
print('Number of High Priority jobs assigned:',high['PersonID'].count(),'Jobs')

Number of High Priority jobs assigned: 36 Jobs


In [57]:
# Number of High Priority Jobs done vs Days
high.groupby('Day').count()
# From day 1 -> 6 , 6 high-priority jobs were done each day
# It takes 6 days to complete the 36 High Priority jobs!

Unnamed: 0_level_0,PersonID
Day,Unnamed: 1_level_1
1,6
2,6
3,6
4,6
5,6
6,6


## 3. The number of days it takes to complete the whole list of jobs

In [62]:
# we can check this answer just by checking the max value of Day column
max=data['Day'].max()
print('The number of days it takes to complete the whole list of jobs: ',max,'Days')

The number of days it takes to complete the whole list of jobs:  50 Days


## 4. Time taken to generate
We see from the data above that for :
* 1. 300 Data points : we take 0.15445661544799805 seconds
* 2. 14 Data points : we take 0.02830362319946289 seconds