# Ciphertext Decryption Using MCMC

In [19]:
import random
import numpy as np
import re
import string

In [201]:
def encrypt_or_decrypt(text: str, mapping: dict) -> str:
        transformed_text = "".join([mapping[s] for s in text])
        return transformed_text

In [202]:
def prepare_empirical_freq_normalized(path_to_file):
    #Dictionary to store bigram pairs counts
    char_bigram_counts = {}

    with open(path_to_file, encoding='utf-8') as f:
        for line in f:
            #pre-process line
            pattern = re.compile('[^a-z ]')
            clean_line = np.array(list(pattern.sub('',line.lower())))
            
            #create symbols transition array
            transitions = clean_line.repeat(2)[1:-1].reshape(-1, 2)
            
            #count every symbols transition met in line
            for i, j in transitions:
                char_bigram_counts[(i, j)] = char_bigram_counts.setdefault((i, j), 0) + 1
    
    #create letters encoder
    letters = [" "] + list(string.ascii_lowercase)
    unigram_to_index = dict(zip(letters, range(len(letters))))

    # Create transition matrix
    n = len(unigram_to_index)
    transition_matrix = np.ones((n,n)) + 1
    
    #fill in transition matrix for each pair
    for s_pair in char_bigram_counts.keys():
        transition_matrix[unigram_to_index[s_pair[0]]][unigram_to_index[s_pair[1]]] = char_bigram_counts[s_pair]

    #normalize matrix rows
    row_sums = transition_matrix.sum(axis=1)
    empirical_frequences = transition_matrix / row_sums[:, np.newaxis]

    return empirical_frequences

In [203]:
def score_cipher(cipher, char_index_map, encrypted, transition_mtx, return_sample=False):
    decrypted = encrypt_or_decrypt(encrypted, {v:k for k,v in cipher.items()})
    score = 0
    for i in range(len(decrypted) - 1):
        score += np.log(transition_mtx[char_index_map[decrypted[i]]][char_index_map[decrypted[i+1]]])
        
    return decrypted[:100] if return_sample else score

In [204]:
def process_decryption(encrypted, usual_alphabet,code_space, trans_mtx, char_index_mapping, iters=2500):

    # Initialize with a random mapping
    current_cifer = dict(zip(list(code_space),list(usual_alphabet)))
    current_score = score_cipher(current_cifer, char_index_mapping, encrypted, trans_mtx)
    
    best_cifer, best_score = current_cifer.copy(), current_score
    for i in range(0,iters):

        # Create proposal from f by random transposition of 2 letters
        r1, r2 = np.random.choice(list(code_space), 2, replace=True)
        new_cifer = current_cifer.copy()
        new_cifer[r1] = current_cifer[r2]
        new_cifer[r2] = current_cifer[r1]
        new_score = score_cipher(new_cifer, char_index_mapping, encrypted, trans_mtx)

        # Decide to accept new proposal
        if new_score > current_score or random.uniform(0,1) < np.exp(new_score - current_score):
            current_cifer = new_cifer.copy()
            current_score = new_score
            
        if new_score > best_score:
            best_score = new_score
            best_cifer = new_cifer.copy()
        
        # Print out progress
        if i % 500 == 0: 
            best_attempt_smpl = score_cipher(best_cifer, char_index_mapping, encrypted, trans_mtx, return_sample=True)
            print(i, ':\t', best_attempt_smpl)


    # Save best mapping
    cipher_alphabet = "".join([k for k in sorted(best_cifer.keys())])
    plaintext_alphabet = "".join([best_cifer[k] for k in cipher_alphabet])
    mapping = dict(zip(plaintext_alphabet, cipher_alphabet))
    
    best_attempt = encrypt_or_decrypt(ciphertext, mapping)
    
    return best_attempt

In [205]:
text = """
Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversation?'
So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.
There was nothing so very remarkable in that; nor did Alice think it so very much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually took a watch out of its waistcoat-pocket, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before see a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge.
In another moment down went Alice after it, never once considering how in the world she was to get out again.
The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well.
"""
regex_pattern = '[^a-z ]'
pattern = re.compile(test_pattern)
text_cleaned = pattern.sub('', text.lower())

# Generate random code space
en_alphabet = " "+string.ascii_lowercase
tmp = list(en_alphabet)
random.shuffle(tmp)
random_cifer = dict(zip(list(en_alphabet), tmp))

# Display the true key
encrypted = encrypt_or_decrypt(text_cleaned, random_cifer)

# Compute english bigram frequencies from a reference text
empirical_frequences = prepare_empirical_freq_normalized('war_and_peace.txt')

In [206]:
init = list(en_alphabet).copy()
random.shuffle(init)

c_i_map = dict(zip([' '] + list(string.ascii_lowercase), range(27)))
decrypted_text = process_decryption(encrypted, en_alphabet, init, empirical_frequences, c_i_map, iters=5000)

print("attempted decryption:")
print(decrypted_text)

0 :	 yolpvbcydbxv lssls bfkb vfbrvejbflevnbkgbdlffls bxjbqvebdldfvebksbfqvbxysabysnbkgbqyrls bskfqls bfkb
500 :	 aducr haf brwunnunw ti wrt grey tuers im futtunw by ore fuftre in tor bank ans im oagunw nitounw ti 
1000 :	 asice paf beginning to get wery tired om fitting by her fifter on the bank and om hawing nothing to 
1500 :	 alice was peginning to get bery tired of sitting py her sister on the pank and of habing nothing to 
2000 :	 alice was peginning to get bery tired of sitting py her sister on the pank and of habing nothing to 
2500 :	 alice was peginning to get very tired of sitting py her sister on the pank and of having nothing to 
3000 :	 alice was peginning to get very tired of sitting py her sister on the pank and of having nothing to 
3500 :	 alice was beginning to get very tired of sitting by her sister on the bank and of having nothing to 
4000 :	 alice was beginning to get very tired of sitting by her sister on the bank and of having nothing to 
4500 :	 alice was begin