# Chi squared attack on Caesar encryption

In [255]:
import numpy as np
import pandas as pd
import time

In [256]:
def caesar_encryption(alphabet, shift, text, direction):
    # remove spaces from text and uppercase all text
    text = text.replace(" ", "").upper()
    
    # loop over all letters from the text
    returned_text = []
    for letter in list(text):
        for counter, value in enumerate(list(alphabet)):
            
            # encryption : E = (x + shift) mod len(aplhabet)
            if letter==value and direction=='encrypt':
                returned_text.append(alphabet[(counter + shift)%len(list(alphabet))])
                break
                
            # decryption : E = (x - shift) mod len(aplhabet)
            elif letter==value and direction=='decrypt':
                returned_text.append(alphabet[(counter - shift)%len(list(alphabet))])
                break
                
    return "".join(returned_text)

In [257]:
alpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
text = 'THIS IS A TEST TO PROVE THE EFFICIENCY OF THE CHI SQUARED ATTACK'

encrypted_txt = caesar_encryption(alpha, 6, text, 'encrypt')
print(encrypted_txt)
decrypted_txt = caesar_encryption(alpha, 6, encrypted_txt, 'decrypt')
print(decrypted_txt)

ZNOYOYGZKYZZUVXUBKZNKKLLOIOKTIEULZNKINOYWAGXKJGZZGIQ
THISISATESTTOPROVETHEEFFICIENCYOFTHECHISQUAREDATTACK


In [258]:
def chi_squared_attack(alphabet, encrypted_txt, max_n):
    # load n_grams and initialize variables
    n_grams = pd.read_csv('n_grams_eng.csv')
    n_grams['n'] = n_grams['n_gram'].apply(lambda x: len(str(x)))
    
    final_score = 0
    decrypted_txt = ''
    hist = pd.DataFrame(columns=['shift', 'chi_2_score','decrypt'])
    
    # generate the n_gram list to loop over
    search_gram = np.array(n_grams['n_gram'].loc[n_grams['n'] <= max_n])
    
    # loop over each n_gram and test decryption
    for i in range(len(alphabet)):
        
        # start chrono
        start = time.time()
        
        # test the i_th shift for decryption
        test_decrypt = caesar_encryption(alpha, i, encrypted_txt, 'decrypt')
        
        # calculate the chi squared score i for each letter
        score_chi2_i = []
        for n_gram in search_gram:
            ci = test_decrypt.count(n_gram) # Actual count of the letter
            ei = float(n_grams['freq'].loc[n_grams['n_gram']==n_gram])*len(encrypted_txt)
            score_chi2_i.append((ci - ei)**2/ei )
        
        # end chrono
        end = time.time()
        
        # sum all chi squared score i to calculate total chi squared score 
        score_chi2 = sum(score_chi2_i)
        
        # save history
        hist.loc[i] = [i,score_chi2,test_decrypt]
        
        # keep only best score (inverse score to initialize to 0 and keep highest)
        if 1/score_chi2 > final_score:
            final_score = 1/score_chi2
            decrypted_txt = test_decrypt
    
        # display progress
        print(
            str(i)+'/'+str(len(search_gram))+':',
            str(round(end - start,2))+'s', # display elapsed time
            'score:',round(score_chi2,3),
            test_decrypt[:15]+'...') # display only 15 characters
    
    # display the best solutions
    print(hist.sort_values('chi_2_score').head())
     
    return decrypted_txt, final_score

In [259]:
chi_squared_attack(alpha, encrypted_txt,1)

0/26: 0.61s score: 1276.314 ZNOYOYGZKYZZUVX...
1/26: 0.59s score: 697.239 YMNXNXFYJXYYTUW...
2/26: 0.58s score: 773.432 XLMWMWEXIWXXSTV...
3/26: 0.62s score: 175.473 WKLVLVDWHVWWRSU...
4/26: 0.58s score: 517.616 VJKUKUCVGUVVQRT...
5/26: 0.61s score: 347.762 UIJTJTBUFTUUPQS...
6/26: 0.59s score: 37.892 THISISATESTTOPR...
7/26: 0.62s score: 409.562 SGHRHRZSDRSSNOQ...
8/26: 0.56s score: 370.559 RFGQGQYRCQRRMNP...
9/26: 0.59s score: 1677.187 QEFPFPXQBPQQLMO...
10/26: 0.62s score: 149.881 PDEOEOWPAOPPKLN...
11/26: 0.58s score: 1117.344 OCDNDNVOZNOOJKM...
12/26: 0.59s score: 274.238 NBCMCMUNYMNNIJL...
13/26: 0.57s score: 597.729 MABLBLTMXLMMHIK...
14/26: 0.57s score: 516.152 LZAKAKSLWKLLGHJ...
15/26: 0.57s score: 801.72 KYZJZJRKVJKKFGI...
16/26: 0.58s score: 1061.284 JXYIYIQJUIJJEFH...
17/26: 0.57s score: 314.169 IWXHXHPITHIIDEG...
18/26: 0.57s score: 380.753 HVWGWGOHSGHHCDF...
19/26: 0.57s score: 156.938 GUVFVFNGRFGGBCE...
20/26: 0.57s score: 981.33 FTUEUEMFQEFFABD...
21/26: 0.58s score: 36

('THISISATESTTOPROVETHEEFFICIENCYOFTHECHISQUAREDATTACK', 0.026390737749238165)