In [1]:
import pandas as pd
import string
import random
import pickle
import numpy as np

In [2]:
with open("ngram_data/word_dict.pickle","rb") as f:
    word_dict = pickle.load(f)
with open("ngram_data/word_prob_dict.pickle","rb") as f:
    word_prob_dict = pickle.load(f)

In [3]:
word_freq=pd.read_csv("ngram_data/word_freq.csv").set_index("word")["frequency"].to_dict()

In [4]:
bigram=pd.read_csv("ngram_data/2gram.csv").set_index("2-gram")["frequency"].to_dict()
trigram=pd.read_csv("ngram_data/3gram.csv").set_index("3-gram")["frequency"].to_dict()

In [121]:
def encrypt(plaintext,key):
    trans = plaintext.maketrans(key,string.ascii_lowercase)
    cipher = plaintext.translate(trans)
    return cipher

def decrypt(ciphertext,key):
    trans = ciphertext.maketrans(string.ascii_lowercase,key)
    cipher = ciphertext.translate(trans)
    return cipher

def paternify(instr):
    ht = dict()
    pattern = ""
    cnt = 97
    for i in instr:
        if i in ht:
            pattern = pattern + ht[i]
        else:
            pattern = pattern + chr(cnt)
            ht[i]=chr(cnt)
            cnt = cnt + 1
    return pattern

def bigram_score(instring,bigram):
    prob = None
    for i,j in zip(instring, instring[1:]):
        if i+j in bigram:
            if prob==None:
                prob = np.log(bigram[i+j])
            else:
                prob=prob + np.log(bigram[i+j])
        
    return prob

def trigram_score(instring,trigram):
    prob = None
    for i,j,z in zip(instring, instring[1:],instring[2:]):
        if i+j+z in trigram:
            if prob==None:
                prob = np.log(trigram[i+j+z])
            else:
                prob=prob + np.log(trigram[i+j+z])
        
    return prob

def word_score(instring,word_freq):
    oov_score = np.log(10**(-10))
    words = instring.split(" ")
    prob = 0
    for i in words:
        if i in word_freq:
            prob += np.log(word_freq[i])
        else:
            prob += oov_score
            
    return prob

def transify(str1,str2):
    trans = dict()
    
    fxu = list(set(str1)-set(str2))
    fxl = list(set(str2)-set(str1))
    
    for i,j in zip(str1,str2):
        if ord(i) not in trans:
            trans[ord(i)]=ord(j)
    
    for i,j in zip(fxu,fxl):
        trans[ord(j)]=ord(i)

    return trans

def isconsistent(assignment,pattern, word):
    
    for i,j in zip(pattern,word):
        if ord(i) in assignment:
            if assignment[ord(i)]!=ord(j):
                return False
        elif ord(j) in assignment.values():
            return False
            
    return True

def get_assignment(word1,word2):
    trans = dict()
    for i,j in zip(word1,word2):
        if i not in trans:
            trans[ord(i)]=ord(j)
    return trans


def assignment_trans(assignment):
    
    vl = set(assignment.values())
    ks = set(assignment.keys())
    
    fxu = list(ks - vl)
    fxl = list(vl - ks)
    
    for i,j in zip(fxu,fxl):
        assignment[j]=i

    return assignment

In [144]:
actual_key = "badcfehgjilkonmrqputsxwvzy"
plaintext = "from bucharest to zambia to zaire ozone zones make zebras run zany zigzags"
# plaintext = "person octapus their small to a who is so big that is"
cipher = encrypt(plaintext,actual_key)

In [145]:
%%time
def solver(cipher, word_dict):
    key = string.ascii_lowercase
    word_seq = cipher.split(" ")
    solutions = []
    partial_solutions=[]
    que = []
    que.append((0,{}))
    while len(que)!=0:
        
        index, assignment = que.pop()
        if index >= len(word_seq):
            cor_assign = assignment_trans(assignment)
            solutions.append(cipher.translate(cor_assign))
            print(cipher.translate(cor_assign))

        else:
            values = word_dict[paternify(word_seq[index])]
            was_assign = False
            for sub_word in values:
                if isconsistent(assignment,word_seq[index],sub_word):
                    cassignment = dict()
                    cassignment.update(assignment)
                    cassignment.update(get_assignment(word_seq[index],sub_word))
                    que.append((index+1,cassignment))
                    was_assign=True
            if was_assign==False:
                cor_assign = assignment_trans(assignment)
                partial_solutions.append(cipher.translate(cor_assign))
                
solver(cipher,word_dict)

prom bucharest to zambia to zaire ozone zones male zebras run zany zigzags
prom bucharest to zambia to zaire ozone zones make zebras run zany zigzags
prom bucharest to zambia to zaire ozone zones made zebras run zany zigzags
from bucharest to zambia to zaire ozone zones male zebras run zany zigzags
from bucharest to zambia to zaire ozone zones make zebras run zany zigzags
from bucharest to zambia to zaire ozone zones made zebras run zany zigzags
drom bucharest to zambia to zaire ozone zones male zebras run zany zigzags
drom bucharest to zambia to zaire ozone zones make zebras run zany zigzags
CPU times: user 45.3 s, sys: 4.76 ms, total: 45.3 s
Wall time: 45.3 s


In [10]:
def old_solve(cipher,word_dict,word_prob_dict):
    key=string.ascii_lowercase
    plaintext=decrypt(cipher,key)
    best_score = word_score(plaintext,word_freq)

    for i in range(10000):
        word_seq = plaintext.split(" ")
        len_seq = np.array([len(i) for i in word_seq])
        len_seq = len_seq/sum(len_seq)
        rnd_word = np.random.choice(word_seq,p=len_seq)
        
        rnd_word_pattern = paternify(rnd_word)
        word_sub_prob=word_prob_dict[rnd_word_pattern]/np.sum(word_prob_dict[rnd_word_pattern])
        
        sub_word = np.random.choice(word_dict[rnd_word_pattern],p=word_sub_prob)
        trans = transify(rnd_word,sub_word)
        mod_key = key.translate(trans)
        mod_plaintext=decrypt(cipher,mod_key)
        if(word_score(mod_plaintext,word_freq)>best_score):
            key=mod_key
            plaintext=mod_plaintext
            best_score=word_score(mod_plaintext,word_freq)
            print(best_score,plaintext)
        
        
        
solve(cipher,word_dict,word_prob_dict)

-268.5777555542455 femj asdgterub bm otjaht bm other momnr omnru jtlr oraetu esn otnz ohyotyu
-264.1994917677408 fimj ksdeniagb bm tnjkhn bm tnhia mtmoa tmoag jnla taking iso tnoz thytnyg
-257.13089216442813 fcmj syndicate em bijshi em bihca mbmoa bmoat jila bascit cyo bioz bhgbigt
-251.7735704596357 fvmj discovery ym bojdho ym bohve mbmae bmaer jole bedvor via boaz bhgbogr
-243.52533624890765 fdmj vascoderi im bojvho im bohde mbmye bmyer jole bevdor day boyz bhgbogr
-237.03765230988915 fdmb vascodhri im lobveo im loedh mlmyh lmyhr both lhvdor day loyz leglogr
-236.60473063674957 vdmf oascedhli im before im berdh mbmyh bmyhl feth bhodel day beyz brgbegl
-234.67667809602315 vdfc oasredhli if become if bemdh fbfyh bfyhl ceth bhodel day beyz bmgbegl
-232.58688365866166 vwfc oayrewhli if become if bemwh fbfsh bfshl ceth bhowel was besz bmgbegl
-231.31638010566104 vafc onyreahli if become if bemah fbfdh bfdhl ceth bhoael and bedz bmgbegl
-228.89461298921572 vatc onyreahli it become it bemah