In [1]:
import numpy as np
import pandas as pd
from ortools.linear_solver import pywraplp

In [2]:
data = pd.read_csv("dataset_2000x1000.csv")

In [3]:
costs = data.to_numpy()

In [17]:
# print(costs[4][8])

In [20]:
num_workers = len(costs[0])
num_tasks = len(costs)

print(f"{num_workers} workers [columns]\n{num_tasks} tasks [rows]")

1000 workers [columns]
2000 tasks [rows]


In [23]:
solver = pywraplp.Solver.CreateSolver('SCIP')

In [7]:
x = {}
for i in range(num_tasks):
    for j in range(num_workers):
        x[i, j] = solver.IntVar(0, 1, "")

In [8]:
# each task is assigned to one worker only
for i in range(num_tasks):
    solver.Add(solver.Sum([x[i, j] for j in range(num_workers)]) == 1)

# one person can take up multiple jobs
for j in range(num_workers):
    solver.Add(solver.Sum([x[i, j] for i in range(num_tasks)]) >= 1)

In [9]:
objective_terms = []

for i in range(num_tasks):
    for j in range(num_workers):
        objective_terms.append(costs[i][j] * x[i, j])

solver.Minimize(solver.Sum(objective_terms))

In [10]:
status = solver.Solve()

In [11]:
if status == pywraplp.Solver.OPTIMAL or status == pywraplp.Solver.FEASIBLE:
    print(f'Total Cost = {solver.Objective().Value()}\n')

else:
    print("NO SOLUTION FOUND")

Total Cost = 100035.0



Extract the solution

In [14]:
solution = []
for i in range(num_tasks):
    for j in range(num_workers):
        if x[i, j].solution_value() == 1:
            solution.append((i, j, costs[i][j]))

filename = "AP_solution_2000x1000.csv"

In [16]:
import csv
header = ['Task', 'Worker', 'Cost']
with open(filename, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(header)
    for row in solution:
        csvwriter.writerow(row)