# Bitwise Majority Alignment

# Main BMA Algorithm

In [9]:
import numpy as np
import random
import csv
import time
import math

In [10]:
def bmaByCode(receivedStringArray):
    noOfStrings, length = receivedStringArray.shape
    
    c = np.zeros(noOfStrings,dtype=int)
    transmittedString = np.zeros(length,dtype=int)
    
    for i in range(length):
        temp = [0,0]
        for j in range(noOfStrings):
            tmp = receivedStringArray[j,c[j]]
            temp[tmp] += 1
        index_max = np.argmax(temp)
        transmittedString[i] = index_max  
        
        for j in range(noOfStrings):
            temp = c[j]
            for k in range(temp+1,length):
                if(receivedStringArray[j,k] == index_max):
                    c[j] = k
                    break
                c[j] = length-1
            
                  

    return transmittedString

In [11]:
def bma(receivedStringArray):
    noOfStrings, length = receivedStringArray.shape
    
    c = np.zeros(noOfStrings,dtype=int)
    transmittedString = np.zeros(length,dtype=int)
    
    for i in range(length):
        temp = [0,0]
        for j in range(noOfStrings):
            tmp = receivedStringArray[j,c[j]]
            temp[tmp] += 1
        index_max = np.argmax(temp)
        transmittedString[i] = index_max  
        
        for j in range(noOfStrings):
            if receivedStringArray[j,c[j]] == transmittedString[i]:
                c[j] += 1
            else:
                continue
            
                  

    return transmittedString

In [12]:
#For Testing

print(bma(np.array([[1,1,1,1],[1,1,1,0],[1,1,1,1]])))

[1 1 1 1]


# Dataset Generation

In [13]:
def createDataset(length, noOfTransmissions, prob):
    transmittedString = np.zeros(length,dtype=int)
    matrix = np.zeros((noOfTransmissions,length),dtype=int)
    
    for i in range(length):
        transmittedString[i] = random.randint(0,1)
    
    for k in range(noOfTransmissions):
        temp = np.zeros(length,dtype=int)
        j = 0
        for i in range(length):
            a = random.choices([0,1], weights=(prob,100-prob), k=1)
            if(a == [0]):
                continue
            else:
                temp[j] = transmittedString[i]
                j += 1
        matrix[k] = temp
    
    return transmittedString, matrix

In [14]:
correctString, generatedDataset = createDataset(5,10,10)
calculatedString = bma(np.array(generatedDataset))
   
print('Original String: {}\nGenerated Matrix:\n {}\nCalculated String: {}'.format(correctString,  generatedDataset, calculatedString))

Original String: [1 0 1 1 0]
Generated Matrix:
 [[1 0 1 1 0]
 [1 0 1 1 0]
 [1 0 1 0 0]
 [1 0 1 1 0]
 [1 0 1 0 0]
 [1 0 1 1 0]
 [1 0 1 1 0]
 [1 0 1 0 0]
 [1 1 1 0 0]
 [1 0 1 1 0]]
Calculated String: [1 0 1 1 0]


# Validation of the Algorithm

In [15]:
# prob_pool = [0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0]

# header = ['Length', 'No. of Transmits', 'Deletion Probability', 'Percentage Error', 'Time Taken']

# file = 'BMA Results.csv'

# with open(file, 'w',newline='') as csvfile:
#     csvwriter = csv.writer(csvfile)
#     csvwriter.writerow(header)
#     for length in range(10,100, 5):
#         for transmit in range(1, length+1,math.isqrt(length)):
#             for prob in prob_pool:
#                 correct = 0
#                 start = time.time()
#                 for i in range(1000):
#                     correctString, generatedDataset = createDataset(length,transmit,prob)
#                     calculatedString = bma(np.array(generatedDataset))

#                     if(str(correctString) == str(calculatedString)):
#                         correct += 1
#                 per = (1000 - correct)*0.1
#                 end = time.time()
#                 tt = end - start

#                 temp = [length, transmit, prob, per,tt]
#                 csvwriter.writerow(temp)

We now take more realistic values of the original DNA reconstruction problem. For that, the limits are as follows:

Probabilities: 0.1% to 10% in multiples of 3
Length of original string: 10-100 in steps of 5
No of transmits: 3-50 in steps of 3

In [16]:
# prob_pool = [0.1, 0.3, 1.0, 3.0, 10.0]

# header = ['Length', 'No. of Transmits', 'Deletion Probability', 'Percentage Error', 'Time Taken']

# file = 'BMA Results(Realistic).csv'

# with open(file, 'w',newline='') as csvfile:
#     csvwriter = csv.writer(csvfile)
#     csvwriter.writerow(header)
#     for length in range(10,100, 5):
#         for transmit in range(3, 50, 3):
#             for prob in prob_pool:
#                 correct = 0
#                 start = time.time()
#                 for i in range(1000):
#                     correctString, generatedDataset = createDataset(length,transmit,prob)
#                     calculatedString = bma(np.array(generatedDataset))
     
#                     if(str(correctString) == str(calculatedString)):
#                         correct += 1
#                 per = (1000 - correct)*0.1
#                 end = time.time()
#                 tt = end - start

#                 temp = [length, transmit, prob, per,tt]
#                 csvwriter.writerow(temp)