In [1]:
from sklearn.metrics.pairwise import euclidean_distances
import numpy as np
import random

In [2]:
d_origin = np.array([[40, 32, 65], [1, 33, 33], [12, 30, 32]])
d = d_origin.copy()

In [3]:
print(np.shape(d))
print(d)

STAR = 1
PRIME = 2
COVERED = 1
NOT_CONVERED = 0

(3, 3)
[[40 32 65]
 [ 1 33 33]
 [12 30 32]]


In [4]:
def choice_exist_in_all_covered_col(choice, covered_cols):
    for c, col in enumerate(covered_cols):
        if col:
            if not np.any(choice[:, c]):
                return False
    return True

def mark_cols_have_zeros_in_marked_rows(zeros_location, covered_rows, covered_cols):
    for r in range(len(zeros_location)):
            for c in range(len(zeros_location)):
                if zeros_location[r][c] and covered_rows[r] and not covered_cols[c]:
                    covered_cols[c] = True

def find_col_idx_with_no_choice(choice, covered_cols): 
    for c, col in enumerate(choice.T):
        if covered_cols[c] and not np.any(col):
            return c
    return -1

# find a zero with col_idx_with_no_choice that does not have a row with a choice
def find_row_idx_with_no_choice(zeros_location, choice, col_idx_with_no_choice):
    for i, row in enumerate(choice):
        for j, col in enumerate(row):
            if j == col_idx_with_no_choice and zeros_location[i][j] and not np.any(row):
                return i
    return -1

# find a row idx for choice such that the col to change is optimal
# otherwise, return random row and col
def find_choice_row_col(zeros_location, choice, col_idx_with_no_choice):
    row_indices, = np.where(zeros_location[:, col_idx_with_no_choice])
    for row_idx in row_indices:
        col_indices, = np.where(choice[row_idx])
        col_idx = col_indices[0]
        if find_row_idx_with_no_choice(zeros_location, choice, col_idx) != -1:
            return row_idx, col_idx
    random.shuffle(row_indices)
    col_idx, = np.where(choice[row_indices[0]])
    return row_indices[0], col_idx[0]

def cover_zeros(d):
    zeros_location = (d == 0)
    choice = np.zeros(np.shape(d), dtype=bool)
    # Step 3 cover all zeros using minimum number of lines
    covered_rows = [False] * np.shape(d)[0]
    covered_cols = [False] * np.shape(d)[1]
    
    while True:
        covered_rows = [False] * np.shape(d)[0]
        covered_cols = [False] * np.shape(d)[1]
        
        # mark all rows in which no choice has been made
        for r, row in enumerate(choice):
            if not np.any(row):
                covered_rows[r] = True
        
        # if no marked row left
        if not np.sum(covered_rows):
#             print(1)
            return covered_rows, covered_cols, choice

        # mark all cols not already marked which have zeros in marked rows        
        before = np.sum(covered_cols)
        mark_cols_have_zeros_in_marked_rows(zeros_location, covered_rows, covered_cols)
        
        # if no new marked cols
        if before == np.sum(covered_cols):
#             print(2)
            return covered_rows, covered_cols, choice
        
        # while there is some choice in every marked col
        while choice_exist_in_all_covered_col(choice, covered_cols):
            # mark all rows not already marked that have choices in marked col
            before = np.sum(covered_rows)
            for r in range(len(choice)):
                for c in range(len(choice)):
                    if covered_cols[c] and choice[r][c] and not covered_rows[r]:
                        covered_rows[r] = True
            
            if before == np.sum(covered_rows):
#                 print(3)
                return covered_rows, covered_cols, choice
            
            before = np.sum(covered_cols)
            # mark all cols not already marked which have zeros in marked rows
            mark_cols_have_zeros_in_marked_rows(zeros_location, covered_rows, covered_cols)
            
            if before == np.sum(covered_cols):
#                 print(4)
                return covered_rows, covered_cols, choice
        
        # find a marked col that doesn't have a choice
        col_idx_with_no_choice = find_col_idx_with_no_choice(choice, covered_cols)
        while col_idx_with_no_choice != -1:
            # find a zero with col_idx_with_no_choice that does not have a row with a choice
            choice_row_idx = find_row_idx_with_no_choice(zeros_location, choice, col_idx_with_no_choice)
            
            choice_col_idx = -1
            if choice_row_idx == -1:
                # find a row to swap
                choice_row_idx, choice_col_idx = find_choice_row_col(zeros_location, choice, col_idx_with_no_choice)
                
                choice[choice_row_idx, choice_col_idx] = False
                
            choice[choice_row_idx, col_idx_with_no_choice] = True
            
            col_idx_with_no_choice = choice_col_idx

In [5]:
def hungarian(d):
    # Step 1:
    # Row reduction
    for i, row in enumerate(d):
        d[i] -= np.min(row)
    # Step 2:
    # Column reduction
    for i, col in enumerate(d.T):
            d[:, i] -= np.min(col)
        
    
    while True:
        # Step 3 cover all zeros using minimum number of lines
        covered_rows, covered_cols, choice = cover_zeros(d)
        lines = np.sum(covered_cols) + len(covered_rows) - np.sum(covered_rows)
        # step 4 test for optimality
        if lines >= np.shape(d)[0]:
            return choice
        else:
            # Step 5 
            # Determine the smallest entry not covered by any line. 
            # Subtract this entry from each uncovered row, 
            # and then add it to each covered column. 
            # Return to step 3
            min_entry_not_covered = None
            for r in np.shape(d)[0]:
                if r in covered_rows:
                    continue
                    for c in np.shape(d)[1]:
                        if c in covered_cols:
                            continue
                        if d[r][c] < min_entry_not_covered:
                            min_entry_not_covered = d[r][c]
            for r, row in enumerate(d):
                if r not in covered_rows:
                    d[r] -= min_entry_not_covered
            for c, col in enumerate(d.T):
                if c in covered_cols:
                    d[:, c] += min_entry_not_covered
            # back to step 3 again
            # cover_zeros(d)

In [6]:
mask = hungarian(d)
total_cost = np.sum(mask * d_origin)
print(mask)
print(total_cost)

[[False  True False]
 [ True False False]
 [False False  True]]
65
