# JobShop algorithm for single stage scheduling 

## Importing packages

In [1]:
import collections
import time
from ortools.sat.python import cp_model
import pandas as pd
import io
import sys
# input = pd.read_csv("input_2_machines.csv", sep="\t")
# actual = pd.read_csv("jobshop_actual_S1.csv", sep="\t")
predicted = pd.read_csv("jobshop_predicted_S1.csv", sep="\t")

## JobShop algorithm


In [8]:
class SolutionPrinter(cp_model.CpSolverSolutionCallback):
    """Print intermediate solutions."""

    def __init__(self):
        cp_model.CpSolverSolutionCallback.__init__(self)
        self.__solution_count = 0
        self.__start_time = time.time()

    def on_solution_callback(self):
        """Called at each new solution."""
        current_time = time.time()
        elapsed_time = current_time - self.__start_time
        print(
            "Solution %i, time = %f s, objective = %i"
            % (self.__solution_count, elapsed_time, self.ObjectiveValue())
        )
        
        self.__solution_count += 1
        if self.__solution_count == 10:
            self.StopSearch()

def flexible_jobshop():
    """solve a small flexible jobshop problem."""
    #Provide data here
    data = predicted # you can change this to predicted
    data = data.rename(columns={"Run": "Sample"})
    run = data["Sample"].values.tolist()
    # make float to int
    values = data.iloc[:, 1:].astype(int)
    sample = data["Sample"].values.tolist()
    # merge values and sample
    data = pd.concat([pd.DataFrame(sample), values], axis=1)
    # rename the 0th column to Sample
    data = data.rename(columns={0: "Sample"})
    # print(run)

    def convert_data(data):
        data_list = data.iloc[:, 1:].values.tolist()
        output_list = [[[(item, index) for index, item in enumerate(sublist)]] for sublist in data_list]
        return output_list

    jobs = convert_data(data)
    num_jobs = len(jobs)
    all_jobs = range(num_jobs)
    num_machines = 5 # pass number of machines used 
    all_machines = range(num_machines)
    # Model the flexible jobshop problem.
    model = cp_model.CpModel()

    horizon = 0
    for job in jobs:
        for task in job:
            max_task_duration = 0
            for alternative in task:
                max_task_duration = max(max_task_duration, alternative[0])
            horizon += max_task_duration
    print("Horizon = %i" % horizon)

    # Global storage of variables.
    intervals_per_resources = collections.defaultdict(list)
    starts = {}  # indexed by (job_id, task_id).
    presences = {}  # indexed by (job_id, task_id, alt_id).
    job_ends = []

    # Scan the jobs and create the relevant variables and intervals.
    for job_id in all_jobs:
        job = jobs[job_id]
        num_tasks = len(job)
        previous_end = None
        for task_id in range(num_tasks):
            task = job[task_id]

            min_duration = task[0][0]
            max_duration = task[0][0]

            num_alternatives = len(task)
            all_alternatives = range(num_alternatives)

            for alt_id in range(1, num_alternatives):
                alt_duration = task[alt_id][0]
                min_duration = min(min_duration, alt_duration)
                max_duration = max(max_duration, alt_duration)

            # Create main interval for the task.
            suffix_name = "_j%i_t%i" % (job_id, task_id)
            start = model.NewIntVar(0, horizon, "start" + suffix_name)
            duration = model.NewIntVar(
                min_duration, max_duration, "duration" + suffix_name
            )
            end = model.NewIntVar(0, horizon, "end" + suffix_name)
            interval = model.NewIntervalVar(
                start, duration, end, "interval" + suffix_name
            )

            # Store the start for the solution.
            starts[(job_id, task_id)] = start

            # Add precedence with previous task in the same job.
            if previous_end is not None:
                model.Add(start >= previous_end)
            previous_end = end

            # Create alternative intervals.
            if num_alternatives > 1:
                l_presences = []
                for alt_id in all_alternatives:
                    alt_suffix = "_j%i_t%i_a%i" % (job_id, task_id, alt_id)
                    l_presence = model.NewBoolVar("presence" + alt_suffix)
                    l_start = model.NewIntVar(0, horizon, "start" + alt_suffix)
                    l_duration = task[alt_id][0]
                    l_end = model.NewIntVar(0, horizon, "end" + alt_suffix)
                    l_interval = model.NewOptionalIntervalVar(
                        l_start, l_duration, l_end, l_presence, "interval" + alt_suffix
                    )
                    l_presences.append(l_presence)

                    # Link the primary/global variables with the local ones.
                    model.Add(start == l_start).OnlyEnforceIf(l_presence)
                    model.Add(duration == l_duration).OnlyEnforceIf(l_presence)
                    model.Add(end == l_end).OnlyEnforceIf(l_presence)

                    # Add the local interval to the right machine.
                    intervals_per_resources[task[alt_id][1]].append(l_interval)

                    # Store the presences for the solution.
                    presences[(job_id, task_id, alt_id)] = l_presence

                # Select exactly one presence variable.
                model.AddExactlyOne(l_presences)
            else:
                intervals_per_resources[task[0][1]].append(interval)
                presences[(job_id, task_id, 0)] = model.NewConstant(1)

        job_ends.append(previous_end)

    # Create machines constraints.
    for machine_id in all_machines:
        intervals = intervals_per_resources[machine_id]
        if len(intervals) > 1:
            model.AddNoOverlap(intervals)

    # Makespan objective
    makespan = model.NewIntVar(0, horizon, "makespan")
    model.AddMaxEquality(makespan, job_ends)
    model.Minimize(makespan)
    # # Solve model.
    # solver = cp_model.CpSolver()
    # solution_printer = SolutionPrinter()
    # status = solver.solve(model, solution_printer)

    # # Print final solution.
    # if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
    #     print(f"Optimal objective value: {solver.objective_value}")
    #     for job_id in all_jobs:
    #         print(f"Job {job_id}")
    #         for task_id, task in enumerate(jobs[job_id]):
    #             start_value = solver.value(starts[(job_id, task_id)])
    #             machine: int = -1
    #             task_duration: int = -1
    #             selected: int = -1
    #             for alt_id, alt in enumerate(task):
    #                 if solver.boolean_value(presences[(job_id, task_id, alt_id)]):
    #                     task_duration, machine = alt
    #                     selected = alt_id
    #             print(
    #                 f"  task_{job_id}_{task_id} starts at {start_value} (alt"
    #                 f" {selected}, machine {machine}, duration {task_duration})"
    #             )

    # Solve model
    solver = cp_model.CpSolver()
    solution_printer = SolutionPrinter()
    status = solver.solve(model, solution_printer)
    n = 0
    # Print final solution.
    if status in (cp_model.OPTIMAL, cp_model.FEASIBLE):
        print(f"Optimal objective value: {solver.objective_value}")
        for job_id in all_jobs:
            for task_id in range(len(jobs[job_id])):
                start_value = solver.Value(starts[(job_id, task_id)])
                machine = -1
                duration = -1
                selected = -1
                for alt_id in range(len(jobs[job_id][task_id])):
                    if solver.Value(presences[(job_id, task_id, alt_id)]):
                        duration = jobs[job_id][task_id][alt_id][0]
                        machine = jobs[job_id][task_id][alt_id][1]
                        selected = alt_id
                runner = run[n]
                print("%s_%i starts at %i (alt %i, machine %i, duration %i)" % (runner, job_id, start_value, selected, machine, duration))
            n = n + 1
        print("solve status: %s" % solver.StatusName(status))
        print("Optimal objective value: %i" % solver.ObjectiveValue())
        print("Statistics")
        print("  - conflicts : %i" % solver.NumConflicts())
        print("  - branches  : %i" % solver.NumBranches())
        print("  - wall time : %f s" % solver.WallTime())

# # Redirect stdout to capture print statements
# old_stdout = sys.stdout
# new_stdout = io.StringIO()
# sys.stdout = new_stdout

# Run the function
flexible_jobshop()

# Reset stdout
# sys.stdout = old_stdout

# Get the output
# output = new_stdout.getvalue()
print(output)  # You can now use the 'output' variable as needed

Horizon = 56350
Solution 0, time = 0.034205 s, objective = 27620
Solution 1, time = 0.039161 s, objective = 8673
Solution 2, time = 0.045783 s, objective = 8572
Solution 3, time = 0.048728 s, objective = 8369
Solution 4, time = 0.059068 s, objective = 8311
Solution 5, time = 0.099974 s, objective = 8269
Solution 6, time = 0.101584 s, objective = 8197
Solution 7, time = 0.118637 s, objective = 8186
Solution 8, time = 0.121381 s, objective = 8148
Solution 9, time = 0.157690 s, objective = 8118
Optimal objective value: 8118.0
ERR068392_0 starts at 5893 (alt 4, machine 4, duration 741)
ERR020289_1 starts at 4956 (alt 2, machine 2, duration 2279)
SRR799760_2 starts at 6085 (alt 0, machine 0, duration 584)
ERR020264_3 starts at 0 (alt 0, machine 0, duration 2924)
ERR062940_4 starts at 5227 (alt 3, machine 3, duration 638)
ERR018544_5 starts at 7235 (alt 2, machine 2, duration 752)
ERR019492_6 starts at 4521 (alt 3, machine 3, duration 706)
ERR018416_7 starts at 2924 (alt 0, machine 0, durati

In [None]:
output


## Actual for predicted

In [None]:
# extract the text from the output, where ERR starts and solve status ends
start = output.find("ERR")
end = output.find("solve status")
output = output[start:end]
print(output)# extract job name, machine id and the duration from the output variable
output = output.split("\n")
output = [x for x in output if x]
output = [x.split() for x in output]
output = [[x[0].split("_")[0], x[5].split(",")[0], x[-1].split(")")[0]] for x in output]
# convert the output to a dataframe
output = pd.DataFrame(output, columns=["Job", "Machine", "Duration"])
print(output)
output_sorted = output.sort_values(by=["Machine"])
# for Job and Machine columns in output dataframe, extract the number from the actual dataframe
# actual = pd.read_csv("jobshop_actual_S1.csv", sep="\t")
# actual.set_index("Unnamed: 0", inplace=True)
import warnings
# warnings.filterwarnings("ignore")
# output["Machine"].replace("0", "1T4GPU (M0)",inplace=True)
# output["Machine"].replace("1", "2T4GPU (M1)",inplace=True)
# output["Machine"].replace("2", "1RTX GPU (M2)",inplace=True)
# output["Machine"].replace("3", "2RTX GPU (M3)",inplace=True)
# output["Machine"].replace("4", "3RTX GPU (M4)",inplace=True)
# add a column in output 
# output["Actual Duration"] = 0
# for i in range(len(output)):
#     output["Actual Duration"][i] = actual.loc[output["Job"][i], output["Machine"][i]]
# output.columns = ["Job", "Predicted Machine", "Predicted Duration", "Actual Duration"]
# output
# df=output
# # Ensure Duration and Actual Duration are numeric
# df['Predicted Duration'] = pd.to_numeric(df['Predicted Duration'])
# df['Actual Duration'] = pd.to_numeric(df['Actual Duration'])

# # Group by Machine and sum Duration and Actual Duration
# grouped = df.groupby('Predicted Machine').agg({
#     'Predicted Duration': 'sum',
#     'Actual Duration': 'sum'
# }).reset_index()

# # Rename columns for clarity
# grouped.columns = ['Machine', 'Predicted (Sum)', 'Actual Duration (Sum)']

# # Display the result
# print(grouped)

## Plan Execution 

In [None]:
t=output_sorted
#increment by 1 in all values of Machine column
t["Machine"] = t["Machine"].astype(int);t["Machine"] = t["Machine"] + 1; t = t[["Job","Machine"]];t
# group Job by Machine and make a list of Jobs of dictionary keeping machine as key
t_jobs = t.groupby('Machine')['Job'].apply(list).reset_index()
t_jobs = t_jobs.set_index('Machine').to_dict(); t_jobs = t_jobs["Job"]
# remove job name from the dictionary
job_allocation=t_jobs;del t_jobs
job_allocation

In [None]:
output

In [None]:
# Input ei - Execution plan for machine mi
# Output: Gives execution schedule for VMs and Executes the schedule on machines
start= time.time() 
# pushing job on machine 
vms = ['vm1', 'vm2', 'vm3', 'vm4', 'vm5'];machines = [1,2,3,4,5]
for i in range(0,len(machines)):
    # subprocess.run(["ssh", vms[i+1], "touch", "script.txt"])
    # append this script.txt file with job_allocation[machines[i]]
    ei = job_allocation[machines[i]]
    with open("script_m{}.txt".format(machines[i]), "w") as f:
        f.write("BEGIN\n")
        for j in ei:
            f.write("EXEC " + j + "\n")
        f.write("END\n")
    # ssh vm1 script.txt >> ei
print("Copied scheduling plan on all machines")
print("Starting job execution on all machines")
for i in range(len(machines)):
    # ssh vm1 python3 parabricks.sh
    print(f"Jobs on Machine {machines[i]}: {job_allocation[machines[i]]}")
end = time.time()
print("Time taken for JobShop schedlule workload is: ", round(end-start,3))