In [1]:
import base64
import json

In [2]:
def Hamming(a: bytes, b: bytes)->int:
    assert type(a) is bytes and type(b) is bytes
    assert len(a) == len(b)
    diff = 0
    for char_a, char_b in zip(a, b):
        xor = char_a ^ char_b
        while xor > 0:
            diff += xor & 0b1
            xor >>= 1
    return diff

In [3]:
Hamming(b'this is a test', b'wokka wokka!!!')

37

In [4]:
def Dot(x, y):
    assert len(x) == len(y)
    return sum( x_*y_ for x_, y_ in zip(x, y) )

def Norm(x):
    return sum( x_**2 for x_ in x ) ** 0.5

def Mean(x):
    return sum(x) / len(x)

def EvaluateStringChars(x: bytes, alpha = 6):
    assert type(x) is bytes
    x = x.lower()
  
    frequencies = 0.082, 0.015, 0.028, 0.043, 0.130, 0.022, 0.020, 0.061, 0.070, 0.002, 0.008, 0.040, 0.024, \
                  0.067, 0.075, 0.019, 0.001, 0.060, 0.063, 0.091, 0.028, 0.010, 0.024, 0.002, 0.020, 0.001
    alphabet = b'abcdefghijklmnopqrstuvwxyz'
    
    v1 = [0] * 256
    for n, char in enumerate(alphabet):
        v1[char] = frequencies[n]
    assert 0.99 < sum(v1) < 1.01
    
    v2 = [0] * 256
    for char in range(256):
        v2[char] = x.count(char) / len(x)
    assert 0.99 < sum(v2) < 1.01

    return Dot(v1, v2) * ( sum( char in { *b' \n', *range(97, 123) } for char in x ) / len(x) ) ** alpha

In [5]:
EvaluateStringChars(b'But soft what light through yonder window breaks')

0.049520833333333326

In [6]:
EvaluateStringChars(b'sdlfajskdlfjalsdjflakk l kjsjdfksd flaksdjf lasjdf')

0.03296

In [7]:
EvaluateStringChars(b'^^^%%%')

0.0

In [8]:
keysize_scores = dict()

for keysize in range(2, 40):
    chunks = [ cyphertext[idx*keysize:(1+idx)*keysize] for idx in range(len(cyphertext)//keysize)  ]
    keysize_scores[keysize] = Mean([ Hamming(chunk1, chunk2)/len(chunk1) for chunk1, chunk2 in zip(chunks[:-1], chunks[1:]) ]) 
    
best_keysizes = sorted(keysize_scores, key = lambda x:keysize_scores[x])
best_keysizes[:5]

NameError: name 'cyphertext' is not defined

In [9]:
def BruteForceXorDecrypt(s: bytes, alpha = 4):
    assert type(s) is bytes
    best_plaintext = ''
    best_score = -1
    best_key = -1
    
    for key in range(256):
        plaintext = bytes([ key^char for char in s ])
        score = EvaluateStringChars(plaintext, alpha = alpha)
        if score > best_score:
            best_score = score
            best_plaintext = plaintext
            best_key = key
                    
        
    return best_key, best_score, best_plaintext
    

In [10]:
cyphertext = bytes.fromhex('1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736')
BruteForceXorDecrypt(cyphertext, alpha = 2)


(88, 0.038346631386118456, b"Cooking MC's like a pound of bacon")

In [11]:
EvaluateStringChars(bytes([ 88^char for char in cyphertext ]))

0.03403042007403601

In [13]:
def MultiKeyXorDecrypt(s: bytes, keysize: int):
    assert type(s) is bytes and type(keysize) is int  and 0 < keysize < len(s)
    nchunks = len(s) // keysize
    
    keys = [-1] * keysize
    scores = [-1] * keysize
    
    for n in range(keysize):
        row = bytes([ char for idx, char in enumerate(s) if idx%keysize == n ])
        keys[n], *junk = BruteForceXorDecrypt(row)
    
    plaintext = bytes([ char ^ keys[n%keysize] for n, char in enumerate(s) ]) 
    score = EvaluateStringChars(plaintext)
    
    return keys, score, plaintext

In [16]:
with open('untitled.txt', mode = 'rb') as file:
    cyphertext = base64.decodebytes(file.read())

In [20]:
keysize_scores = dict()

for keysize in range(2, 40):
    chunks = [ cyphertext[idx*keysize:(1+idx)*keysize] for idx in range(len(cyphertext)//keysize)  ]
    keysize_scores[keysize] = Mean([ Hamming(chunk1, chunk2)/len(chunk1) for chunk1, chunk2 in zip(chunks[:-1], chunks[1:]) ]) 
    
best_keysizes = sorted(keysize_scores, key = lambda x:keysize_scores[x])
best_keysizes[:5]

[29, 38, 13, 9, 16]

In [21]:
best_score = 0
best_keys = []
best_plaintext = b''

for keysize in best_keysizes[:5]:
    keys, score, plaintext = MultiKeyXorDecrypt(cyphertext, keysize)
    print(keysize, '%.3f' % score, plaintext[:100])

29 0.028 b"I'm back and I'm ringin' the bell \nA rockin' on the mike while the fly girls yell \nIn ecstasy in the"
38 0.002 b's\x1am$e`gz:` SY+\nq ug<zdbo!So\x7f XEpu\'\rN\x1bzs\x1bA: \n7cu4:sN\x07"ime:`Uiw`&Rgx*z\tOn)BnJz)\x01cmgo\x01Cr nXUDct~(ts;1Y7'
13 0.002 b't\'p"eakm&z!Yt\x00\x00p&r`\'ahbh+uhs*Ynph;\rI;xs*V<!\x0b<or.=sh+$bkc=|U~wa&\x7fgx*{#Ot.Yilr"7xafo\nHn!i^ycbsr.ro<=B6'
9 0.001 b's,v$d}lp*} Us\x06\x17q&y` zhdu tdx+_iqs\'\x0bE:st*\\! \n7hn.=s\x7f+"bjb:{S~pg,tgx&{>Ou"Enlt(+\x7fkln\nOn<rYycbt~(rs<;U1'
16 0.002 b"s+k(b`kv!P Ri\x06\nv'uz+znbn Xox6Yowo \x10E=rt*]\x10 \x0b!io.:uy $hme=QRbmg&sgx;q%Ns.ECks46~mgi\x10Cn<i^xEbtd.rh<=D:"
