# Scheduling problem in CPMPy, with visualization

Based on Alexander Schiendorfer's, but with simplified visualization
https://github.com/Alexander-Schiendorfer/cp-examples/tree/main/scheduling

Given some jobs with corresponding tasks performed on multiple machines, 
the program finds a schedule that satisfies all priority constraints while minimizing the overall timespan.

This example requires you to install _pandas_ and _plotly_.

In [1]:
! pip install pandas plotly --quiet

In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
from cpmpy import *

In [3]:
# the CPMpy model and variables
def model_scheduling(jobs, lastT, dur, taskToMach):
    dur = np.array(dur)
    taskToMach = np.array(taskToMach)
    nJobs, nTasks = taskToMach.shape  # jobs are rows, tasks are columns

    # Decision variables
    start = intvar(0, lastT, shape=(nJobs, nTasks), name="start")  # Start time of each task
    end = intvar(0, lastT, shape=(nJobs, nTasks), name="end")  # End time of each task

    m = Model()

    # The end of every task is the sum of its start and duration
    m += (end == start + dur)

    # No overlap on the same machine
    for j in range(nJobs):
        for t in range(nTasks):
            m += [ (end[j, t] <= start[j2, t2]) | (end[j2, t2] <= start[j, t])
                   for j2 in range(nJobs) for t2 in range(nTasks)
                   if taskToMach[j, t] == taskToMach[j2, t2] and j != j2]

    # Precedences: do tasks in order for all jobs
    for t in range(nTasks - 1):
        m += [start[:, t + 1] >= end[:, t]]

    # Minimize the makespan,
    # the makespan is defined as the total needed time to finish all jobs
    m.minimize(max(end))

    # Optional constraints
    # The 2nd task of job B has to come before all 2nd tasks of other jobs
    for i in range(nJobs):
        if i != 1:
            m += (start[i, 1] >= start[1, 1])

    return m, (start, end)

In [4]:
# the visualisation: make dataframe, use excellent barchart from plotly express
def visualize_scheduling(jobs, taskToMach, start, end):
    taskToMach = np.array(taskToMach)
    nJobs, nTasks = taskToMach.shape  # jobs are rows, tasks are columns

    jobNames = np.repeat(jobs, nTasks)
    fullNames = np.array([[f"Task {j}{t}" for t in range(nTasks)] for j in jobs])
    df = pd.DataFrame({'Start': start.flatten(),
                       'End': end.flatten(),
                       'Machine': taskToMach.flatten().astype(str),
                       'Job': jobNames.flatten(),
                       'Name': fullNames.flatten(),
                       })
    df['Time'] = df['End'] - df['Start']  # Length on x-axis

    fig = px.bar(df, orientation='h',
                 base="Start", x="Time", y="Machine", color="Job", text="Name")
    return fig.show()

In [5]:
# Example data for job shop scheduling
jobs = ['A', 'B', 'C', 'D']  # Different jobs
lastT = 20  # Time limit
# implicit: 3 tasks per job, columns of the subsequent:
dur = [[5, 2, 3],
       [4, 5, 1],
       [3, 4, 2],
       [1, 1, 1]]  # Per job (row), amount of time (value) needed for each task (column)
taskToMach = [[1, 2, 3],
              [2, 1, 3],
              [2, 3, 1],
              [3, 2, 1]] # Per job (row), on what machine (value) each task has to be performed (column)

In [None]:
# model, solve, visualize
(model, (start,end)) = model_scheduling(jobs, lastT, dur, taskToMach)

sat = model.solve()
if not sat: raise Exception("No solution found.")

# text output
print("Makespan:", max(end.value()))
print("Start times:", start.value())
print("End times:", end.value())

# visualization
visualize_scheduling(jobs, taskToMach, start.value(), end.value())

Makespan: 15
Start times: [[ 0  8 12]
 [ 0  5 11]
 [ 4  7 11]
 [ 0  7 10]]
End times: [[ 5 10 15]
 [ 4 10 12]
 [ 7 11 13]
 [ 1  8 11]]
