In [29]:
import numpy as np
import random


symbols = ['B','.']
data_length = 20 # set it to >= 4

# generate random data
data = np.array([random.choice(symbols) for _ in range(data_length)])
B_count = np.sum(data == 'B')
print(data)

['B' 'B' '.' 'B' '.' 'B' '.' 'B' '.' '.' '.' '.' '.' '.' 'B' '.' 'B' 'B'
 '.' 'B']


In [30]:
def get_longest_streak(data):
    streaks = []
    i = 0
    max_score = 0
    while i < len(data):
        symbol = data[i]
        if symbol == 'B':
            score = 1
            start_pos = i
            while i < len(data):
                try:
                    if data[i+2] == 'B' and data[i+1] == '.':
                        score += 1
                        i += 2
                    else:
                        if score > max_score:
                            max_score = score
                        streaks.append({'score': score, 'start_pos': start_pos, 'end_pos': i})
                        break
                except IndexError:
                    if score > max_score:
                        max_score = score
                    streaks.append({'score': score, 'start_pos': start_pos, 'end_pos': i})
                    break
        i += 1

    # get all streaks with max_score
    max_score_streaks = [streak for streak in streaks if streak['score'] == max_score]
    
    for streak in max_score_streaks:
        # prevent zero length streaks
        if streak['start_pos'] == streak['end_pos']:
            streak['start_pos'] = 0
            streak['end_pos'] = -1

    if len(max_score_streaks) <= 2:
        return max_score_streaks[0]
    
    distances = []
    for i, streak in enumerate(max_score_streaks):
        if i + 1 < len(max_score_streaks):
            distances.append({
                'distance': max_score_streaks[i+1]['start_pos'] - streak['end_pos'],
                'streak' : streak
            })
    
    # select streak with less distance between neighbour streak
    closest_streak = min(distances, key=lambda x: x['distance'])['streak']
    return closest_streak
        
longest_streak = get_longest_streak(data)
print(longest_streak) 

{'score': 4, 'start_pos': 1, 'end_pos': 7}


In [31]:
def swaps_to_streak(side_data, index_importance=True):
    '''
    Input: np.array of side data, example: ['B', '.', '.', '.', '.', '.', 'B', 'B', '.']
           B_even: Bool, should B be placed in even positions like [0,2,4,6...]
    Output: number of swaps needed to continue the streak, for the example above it will be a 3, to make a streak like ['B', '.', 'B', '.', 'B', '.'...]
    '''
    def correct_B_indices(B_indices,start_pos=1):
        # correct B's are left, incorrect B's are right
        # correct B means it is in correct position mathing start_pos(even,odd)
        even = start_pos % 2 == 0
        correct_Bs = B_indices[B_indices % 2 != even] # use correct B's if incorrent ran out
 
        incorrect_Bs = B_indices[B_indices % 2 == even] # use incorrect B's first
        B_indices = np.concatenate((correct_Bs, incorrect_Bs))
        return B_indices

    B_indices = np.where(side_data == 'B')[0]

    b_count = len(B_indices)
    if not b_count:
        # no B's on this side
        return 0

    dot_count = len(side_data) - b_count
    swaps = [] # indices of swaps
    if dot_count < b_count:
        # add N imaginary dots, they are already on other side
        imaginary_dots = np.array(['.'] * (b_count - dot_count))
        side_data = np.concatenate((side_data, imaginary_dots))

    solutions = []
    start_positions = [1]
    if not index_importance:
        start_positions = [B_indices[0], B_indices[0]+1] # find which is better, even or odd start position

    for _ in range(2 if not index_importance else 1):
        print('Processing side:', side_data)
        for start_pos in start_positions:
            B_correct = correct_B_indices(B_indices, start_pos=start_pos) # get correct B's
            for i in range(start_pos, len(side_data), 2):
                if not len(B_correct):
                    # no more B's to swap
                    break
            
                if side_data[i] == 'B':
                    if i not in B_correct:
                        # skip because this B was used in swap
                        continue
                    # B in correct position, prevent it from swapping in future
                    B_correct = B_correct[B_correct != i]

                elif side_data[i] == '.':
                    # dot is in incorrect position, so swap it
                    swaps.append([i, B_correct[-1]])
                    B_correct = np.delete(B_correct, -1)
            if not len(B_correct):
                solutions.append(swaps)
            swaps = []
        side_data = np.flip(side_data)

    best_solution = min(solutions, key=lambda x: len(x))
    print('Swaps to make a streak on this side:', len(best_solution))
    return len(best_solution)



# # side = np.array(['B', '.', 'B', 'B', '.', '.', 'B', '.', '.', 'B'])
print(data)
print(swaps_to_streak(data, False))
left_side = np.flip((data[:longest_streak['start_pos']]))
right_side = data[longest_streak['end_pos']+1:]
# right_side = np.array(['.', '.', 'B', 'B', '.', 'B', '.', 'B', '.', 'B'])
print(swaps_to_streak(left_side)) # process left side
print(swaps_to_streak(right_side)) # process right side with no index importance



['B' 'B' '.' 'B' '.' 'B' '.' 'B' '.' '.' '.' '.' '.' '.' 'B' '.' 'B' 'B'
 '.' 'B']
Processing side: ['B' 'B' '.' 'B' '.' 'B' '.' 'B' '.' '.' '.' '.' '.' '.' 'B' '.' 'B' 'B'
 '.' 'B']
Processing side: ['B' '.' 'B' 'B' '.' 'B' '.' '.' '.' '.' '.' '.' 'B' '.' 'B' '.' 'B' '.'
 'B' 'B']
Swaps to make a streak on this side: 4
4
Processing side: ['B' '.']
Swaps to make a streak on this side: 1
1
Processing side: ['.' '.' '.' '.' '.' '.' 'B' '.' 'B' 'B' '.' 'B']
Swaps to make a streak on this side: 4
4


In [32]:
def place_buckets(data, longest_streak):
    '''
    Input: np.array data, which has only two unique values B and . Example [. . B . B . . B]
    Output: number of iterations to place B in the order like [B . B . B . . .], it is 3 in this case.
    Function should return -1 if it is impossible.
    So it has to be a . between two B.
    '''
    B_count = np.sum(data == 'B')
    D_count = np.sum(data == '.')
    if D_count - B_count < 0 or B_count < 2:
        # it is impossible to sort, not enough space
        return -1
    
    swap_count = 0 # init swap count 
    left_side = data[:longest_streak['start_pos']]
    left_side = np.flip(left_side, axis=0) # flip the side to be equal positioned to right_side
    bs_left = np.sum(left_side == 'B')
    
    right_side = data[longest_streak['end_pos']+1:]
    bs_right = np.sum(right_side == 'B')

    index_importance = True
    if (not bs_left or not bs_right) and longest_streak['score'] < 2:
        # if one side not contains B, then indexes in other side are not important
        print('One side does not contain B, so indexes in other side are not important')
        index_importance = False



    left_side_swap_count = swaps_to_streak(left_side, index_importance)
    right_side_swap_count = swaps_to_streak(right_side, index_importance)


    swap_count += left_side_swap_count + right_side_swap_count

    return swap_count


In [33]:
# data = np.array(['.','.','B','B','.','B','.','.','.','B'])
longest_streak = get_longest_streak(data)
print("The longest streak of B's:", longest_streak)
print('Current data:\n', data)
print("="*10, " RESULT ", "="*10)
swap_count = place_buckets(data, longest_streak)
if(swap_count == -1):
    print('It is impossible to sort this data')
else:
    print('Required swaps:', swap_count)




The longest streak of B's: {'score': 4, 'start_pos': 1, 'end_pos': 7}
Current data:
 ['B' 'B' '.' 'B' '.' 'B' '.' 'B' '.' '.' '.' '.' '.' '.' 'B' '.' 'B' 'B'
 '.' 'B']
Processing side: ['B' '.']
Swaps to make a streak on this side: 1
Processing side: ['.' '.' '.' '.' '.' '.' 'B' '.' 'B' 'B' '.' 'B']
Swaps to make a streak on this side: 4
Required swaps: 5
