In [2]:
import numpy as np

In [108]:
np.sqrt(10)*np.random.randn(10)

array([-2.15576509, -2.05213713,  0.72147374, -0.47810265, -0.34656381,
       -0.0570329 ,  7.11202744,  0.05845679,  0.46165574,  1.72149097])

In [235]:
"""
Noisy channel
"""    
def channel(chanInput):
    chanInput = np.clip(chanInput,-1,1)
    erasedIndex = np.random.randint(3)
    print("ERASED INDEX IS ", erasedIndex)
    chanInput[erasedIndex:len(chanInput):3] = 0
    return chanInput + np.sqrt(10)*np.random.randn(len(chanInput))

"""
Noiseless channel
"""
def easyChannel(chanInput):
    chanInput = np.clip(chanInput,-1,1)
    erasedIndex = np.random.randint(3)
    chanInput[erasedIndex:len(chanInput):3] = 0
    return chanInput
    

"""
Return the array of bits given a number in [0,255]
"""
def getbits(num):
    bits = []
    for i in range(7,-1,-1):
        bits.append((num >> i)&1)
    return bits

"""
Given a string, transforms into into its binary form and replaces each bit by its corresponding codeword
"""
def encode(input,codebook):
    arr = np.array(bytearray(input, 'utf-8')).astype('int')
    output = np.empty
    for i in range(len(arr)):
        num = arr[i]
        bits = getbits(num)
        for j in range(8):
            codeword = np.array(codebook[bits[j]])
            output = np.hstack((output,codeword))
        
    #remove empty in the beginning
    return output[1:]


"""
GIven a codeword, returns whether it was H=0  or H=1
"""
def decode_from_codeword(arr):
    
    
    for j in range(len(arr)):
        if arr[j] == 1:
            return 0
        elif arr[j] == -1:
            return 1
    
    raise ValueError("array isn't a valid codeword")
    

"""
Given an array of 8 bits, compute the corresponding byte
"""
def get_byte_from_arr(arr):
    if(len(arr) != 8):
        raise ValueError("wrong length,should be 8")
        
    byte = 0
    
    for i in range(8):
        byte += arr[i]*(2**(8-(i+1)))
    
    return byte
                     
"""
predict the erased index 
algo: argmin_{j}{sum over |Yi^2 where i is part of the group (Z/3Z) + j }
"""

def predict_erased(input):
    input = input*input
    min = np.inf
    min_index= 0
    
    for i in range(3):
        sum = np.sum(input[i:len(input):3])
        if sum < min:
            min = sum
            min_index = i

    return min_index

def min_distance_prediction(arr,codebook):
    min = -np.inf
    min_index= 0
    
    for i in range(len(codebook.keys())):
        dist = np.dot(arr,codebook[i]) ##compute distance
        
        if dist > min:
            min = dist
            min_index = i
            
    return min_index

"""
This function takes a noisy real-valued 1D np.array of size code_length*k, k positive integer
It assumes that a third of the coordinates were erased and that there's Gaussian Noise of mean 0 and variance 10.

It first predicts the erased index and puts every such erased coordinate at 0.

It then does minimum-distance decoding for sub_array of size code_length and replaces it by the found codeword
""" 
def prediction(input, codebook):
    
    erasedIndex = predict_erased(input)
    print("PREDICTED ERASED INDEX IS ", erasedIndex)
    input[erasedIndex:len(input):3] = 0
    
    code_length = len(codebook[0])
    
    for i in range(0,input.size,code_length):
        sub_arr = input[i:i+code_length]
        
        predicted_index = min_distance_prediction(sub_arr,codebook)
        input[i:i+code_length] = codebook[predicted_index]
            
    return input
    
"""
This function takes a 1D np.array of size (8*6)k and returns the string.
It assumes that input is {-1,1}^((8*6)k) with a third of the values being zero
"""
def decode_after_prediction(input,codebook):
    output = []
    code_length = len(codebook[0])
    outer_step = 8*code_length
    inner_step = code_length
    for i in range(0,input.size,outer_step):
        bits = []
        arr = input[i:i+outer_step]
        for j in range(0,arr.size,inner_step):
            bits.append(decode_from_codeword(arr[j:j+inner_step]))
            
        output.append(get_byte_from_arr(bits))
            
    return str(bytearray(output),'utf-8')

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [272]:
input='asda@¦#°#@°@§#@#§hfasugvbvavàé¨éahsdffffffffffffpoWWR21048725§'
code_length = 100
c_0 = [1 for x in range(code_length)]
c_1 = [-1 for x in range(code_length)]
codebook = {0:c_0,1:c_1}
encoded = encode(input,codebook)
encoded_loss = channel(encoded)
pred = prediction(encoded_loss,codebook)
decoded = decode_after_prediction(pred,codebook)
print(decoded)
print(decoded==input)

ERASED INDEX IS  2
PREDICTED ERASED INDEX IS  2
asda@¦#²#@°@§#@#§hFasug6bvavàé¨éihsdfbffffffbfffpoWWRr1048725§
False


In [30]:
encoded = encode(input)

In [31]:
encoded

array([1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, -1, -1, -1, -1, -1, -1], dtype=object)

In [50]:
encoded_loss = channel(encoded)

ERASED INDEX IS  2


In [51]:
encoded_loss

array([1.0878181579679411, 3.7425516243894585, 1.4435508965388257,
       -1.7470074562080806, -4.235063152136654, 4.9281124867210035,
       2.907786866291487, -3.9577478942203332, 0.6820585413558796,
       -2.3820783571949997, -2.0241208423675836, -2.899222032575209,
       2.3149263654100047, -6.069114561814974, 2.051842447614904,
       -0.28051362226501575, -1.6985784542822937, -2.7754423682397062,
       0.5427252282231689, -2.3115568097410546, -0.6784140504578867,
       1.0042438661885635, 1.3233113125154958, -1.4390815110881896,
       -1.8065456741660761, 1.5180241804618055, 6.35530508148671,
       1.2788779929667315, 1.3304195292142262, -1.2808052105477472,
       -2.5584389377749353, 2.0829011260797525, -0.8126112925273041,
       1.3689044841341695, -3.6921991001611527, 0.28923184782704575,
       -3.0362761759780366, 3.969089109432583, -1.061513422070684,
       -3.5012293925075673, 2.0480871393613267, -1.8286588473922307,
       -0.5903458028600257, 3.292130282510856, 

In [52]:
erasedIndex = predict_erased(encoded_loss)

In [53]:
erasedIndex

2

In [121]:
arr =np.array([-1 for x in range(4)])

In [123]:
arr*arr

array([1, 1, 1, 1])