In [566]:
import numpy as np
import random
import matplotlib.pyplot as plt
from ipywidgets import interactive
from IPython.display import display
import ipywidgets as widgets
symbols = ['B','.']
data_length = 10 # set it to >= 4

# generate random data
data = np.array([random.choice(symbols) for _ in range(data_length)])
B_count = np.sum(data == 'B')
print(data)

['.' '.' '.' '.' 'B' '.' '.' '.' 'B' '.']


In [567]:
def get_longest_streak(data):
    streaks = []
    i = 0
    max_score = 0
    while i < len(data):
        symbol = data[i]
        if symbol == 'B':
            score = 1
            start_pos = i
            while i < len(data):
                try:
                    if data[i+2] == 'B' and data[i+1] == '.':
                        score += 1
                        i += 2
                    else:
                        if score > max_score:
                            max_score = score
                        streaks.append({'score': score, 'start_pos': start_pos, 'end_pos': i})
                        break
                except IndexError:
                    if score > max_score:
                        max_score = score
                    streaks.append({'score': score, 'start_pos': start_pos, 'end_pos': i})
                    break
        i += 1

    # get all streaks with max_score
    max_score_streaks = [streak for streak in streaks if streak['score'] == max_score]
    
    for streak in max_score_streaks:
        # prevent zero length streaks
        if streak['start_pos'] == streak['end_pos']:
            streak['start_pos'] = 0
            streak['end_pos'] = -1

    if len(max_score_streaks) <= 2:
        return max_score_streaks[0]
    
    distances = []
    for i, streak in enumerate(max_score_streaks):
        if i + 1 < len(max_score_streaks):
            distances.append({
                'distance': max_score_streaks[i+1]['start_pos'] - streak['end_pos'],
                'streak' : streak
            })
    
    # select streak with less distance between neighbour streak
    closest_streak = min(distances, key=lambda x: x['distance'])['streak']
    return closest_streak
        
longest_streak = get_longest_streak(data)
print(longest_streak) 

{'score': 1, 'start_pos': 0, 'end_pos': -1}


In [568]:
def swaps_to_streak(side_data, index_importance=True):
    '''
    Input: np.array of side data, example: ['B', '.', '.', '.', '.', '.', 'B', 'B', '.']
           B_even: Bool, should B be placed in even positions like [0,2,4,6...]
    Output: number of swaps needed to continue the streak, for the example above it will be a 3, to make a streak like ['B', '.', 'B', '.', 'B', '.'...]
    '''
    def correct_B_indices(B_indices,start_pos=1):
        # correct B's are left, incorrect B's are right
        # correct B means it is in correct position mathing start_pos(even,odd)
        even = start_pos % 2 == 0
        correct_Bs = B_indices[B_indices % 2 != even] # use correct B's if incorrent ran out
 
        incorrect_Bs = B_indices[B_indices % 2 == even] # use incorrect B's first
        B_indices = np.concatenate((correct_Bs, incorrect_Bs))
        return B_indices

    B_indices = np.where(side_data == 'B')[0]

    b_count = len(B_indices)
    if not b_count:
        # no B's on this side
        return (0, np.array([]), 0)

    dot_count = len(side_data) - b_count
    dots_added = 0

    swaps = [] # indices of swaps
    if dot_count < b_count:
        # add N imaginary dots, they are already on other side
        dots_added = b_count - dot_count
        imaginary_dots = np.array(['.'] * (b_count - dot_count))
        side_data = np.concatenate((side_data, imaginary_dots))

    solutions = []
    start_positions = [1]
    if not index_importance:
        start_positions = [B_indices[0], B_indices[0]+1] # find which is better, even or odd start position

    for _ in range(2 if not index_importance else 1):
        for start_pos in start_positions:
            B_correct = correct_B_indices(B_indices, start_pos=start_pos) # get correct B's

            for i in range(start_pos, len(side_data), 2):
                if not len(B_correct):
                    # no more B's to swap
                    break
            
                if side_data[i] == 'B':
                    if i not in B_correct:
                        # skip because this B was used in swap
                        continue
                    # B in correct position, prevent it from swapping in future
                    B_correct = B_correct[B_correct != i]

                elif side_data[i] == '.':
                    # dot is in incorrect position, so swap it
                    swaps.append([i, B_correct[-1]])
                    B_correct = np.delete(B_correct, -1)
            if not len(B_correct):
                solutions.append(swaps)
            swaps = []
        side_data = np.flip(side_data)

    best_solution = min(solutions, key=lambda x: len(x))
    print('Swaps to make a streak on this side:', len(best_solution))
    return (len(best_solution), np.array(best_solution), dots_added)



# # side = np.array(['B', '.', 'B', 'B', '.', '.', 'B', '.', '.', 'B'])
print(data)
left_side = np.flip((data[:longest_streak['start_pos']]))
right_side = data[longest_streak['end_pos']+1:]
# right_side = np.array(['.', '.', 'B', 'B', '.', 'B', '.', 'B', '.', 'B'])
print(swaps_to_streak(left_side)) # process left side
print(swaps_to_streak(right_side)) # process right side with no index importance



['.' '.' '.' '.' 'B' '.' '.' '.' 'B' '.']
(0, array([], dtype=float64), 0)
swaps: [[1, 8]]
swaps: [[1, 8], [3, 4]]
Swaps to make a streak on this side: 2
(2, array([[1, 8],
       [3, 4]], dtype=int64), 0)


In [569]:
def place_buckets(data, longest_streak):
    '''
    Input: np.array data, which has only two unique values B and . Example [. . B . B . . B]
    Output: number of iterations to place B in the order like [B . B . B . . .], it is 3 in this case.
    Function should return -1 if it is impossible.
    So it has to be a . between two B.
    '''
    B_count = np.sum(data == 'B')
    D_count = np.sum(data == '.')
    if D_count - B_count < 0 or B_count < 2:
        # it is impossible to sort, not enough space
        return -1, [], []
    
    swap_count = 0 # init swap count 
    left_side = data[:longest_streak['start_pos']]
    left_side = np.flip(left_side, axis=0) # flip the side to be equal positioned to right_side
    bs_left = np.sum(left_side == 'B')
    
    right_side = data[longest_streak['end_pos']+1:]
    bs_right = np.sum(right_side == 'B')

    index_importance = True
    if (not bs_left or not bs_right) and longest_streak['score'] < 2:
        # if one side not contains B, then indexes in other side are not important
        print('One side does not contain B, so indexes in other side are not important')
        index_importance = False



    left_side_swap_count, left_side_swaps, dots_added_left = swaps_to_streak(left_side, index_importance)
    right_side_swap_count, right_side_swaps, dots_added_right = swaps_to_streak(right_side, index_importance)

    left_side_swaps = np.array(left_side_swaps)
    right_side_swaps = np.array(right_side_swaps)

    # fix the data and visualization, in case of adding imaginary dots
    if dots_added_right > 0:
        print('Dots added to the right side:', dots_added_right)
        left_side_swaps = left_side_swaps + dots_added_right
        data = np.concatenate((data, np.array(['.'] * dots_added_right)))
    if dots_added_left > 0:
        print('Dots added to the left side:', dots_added_left)
        right_side_swaps = right_side_swaps + dots_added_left
        data = np.concatenate((np.array(['.'] * dots_added_left), data))

    
    left_side_swaps = np.abs(left_side_swaps - longest_streak['start_pos'] + 1) # shift indices to match the original data
    right_side_swaps = right_side_swaps + longest_streak['end_pos'] + 1 # shift indices to match the original data

    if(left_side_swaps.ndim != right_side_swaps.ndim):
        # make them same dimensions
        if left_side_swaps.ndim > right_side_swaps.ndim:
            swaps = left_side_swaps
        else:
            swaps = right_side_swaps
    else:
        swaps = np.concatenate([left_side_swaps,right_side_swaps])

    swap_count += left_side_swap_count + right_side_swap_count

    return (swap_count, swaps, data)


In [570]:
def visualize(data, swaps, iter):
    '''
    Input: swaps - list of tuples with indices of elements to swap like (1,2)
    Output: None
    '''

    plt.figure()
    # plt.axis('off')
    # hide y axis
    plt.yticks([])
    plt.xticks([x for x in range(len(data))])
    # make size normal
    plt.xlim(-1, len(data))
    plt.ylim(-1, 1)
    visual_data = np.array(data, dtype=object)

    for i, symbol in enumerate(visual_data):
        # replace B with green, .(D) with blue
        if symbol == 'B':
            visual_data[i] = {'color': 'green', 'symbol': 'B'}
        else:
            visual_data[i] = {'color': 'blue', 'symbol': 'D'}

    if iter != 0:
        print("Iter:"   , iter)
        for i in range(iter):
            # swap and color last swap red
            if i == iter - 1:
                print("Swapping:", swaps[i])
                visual_data[swaps[i][0]]['color'] = 'red'
                visual_data[swaps[i][1]]['color'] = 'red'
            visual_data[swaps[i][0]], visual_data[swaps[i][1]] = visual_data[swaps[i][1]], visual_data[swaps[i][0]]
    else:
        print("Initial data")

    for i, symbol in enumerate(visual_data):
        # plot symbols
        plt.text(i, 0, symbol['symbol'], horizontalalignment='center', verticalalignment='center', color=symbol['color'])

    plt.show()


In [571]:
longest_streak = get_longest_streak(data)
print("The longest streak of B's:", longest_streak)
print('Current data:\n', data)
print("="*10, " RESULT ", "="*10)
swap_count, swaps, visual_data = place_buckets(data, longest_streak)
print('Required swaps:', swap_count)
if(swap_count == -1):
    print('It is impossible to sort this data')
else:
    # int slider interactive 
    iter_visual =  interactive(visualize, data=widgets.fixed(visual_data), swaps=widgets.fixed(swaps), iter=widgets.IntSlider(min=0, max=swap_count, step=1, value=0))
    display(iter_visual)



The longest streak of B's: {'score': 1, 'start_pos': 0, 'end_pos': -1}
Current data:
 ['.' '.' '.' '.' 'B' '.' '.' '.' 'B' '.']
One side does not contain B, so indexes in other side are not important
swaps: [[6, 8]]
swaps: [[5, 8]]
swaps: [[5, 8], [7, 4]]
swaps: [[4, 8]]
swaps: [[4, 8], [6, 4]]
swaps: [[7, 8]]
swaps: [[7, 8], [9, 4]]
Swaps to make a streak on this side: 1
Required swaps: 1


interactive(children=(IntSlider(value=0, description='iter', max=1), Output()), _dom_classes=('widget-interact…