In [48]:
import base64
import json

In [2]:
def Hamming(a: bytes, b: bytes)->int:
    assert type(a) is bytes and type(b) is bytes
    assert len(a) == len(b)
    diff = 0
    for char_a, char_b in zip(a, b):
        xor = char_a ^ char_b
        while xor > 0:
            diff += xor & 0b1
            xor >>= 1
    return diff

In [3]:
Hamming(b'this is a test', b'wokka wokka!!!')

37

In [4]:
cyphertext = base64.decodebytes(open('untitled.txt', mode = 'rb').read())
cyphertext

b'\x1dB\x1fM\x0b\x0f\x02\x1fO\x13N<\x1aie\x1fI\x1c\x0eN\x13\x01\x0b\x07N\x1b\x01\x16E6\x00\x1e\x01Id T\x1d\x1dC3SNeR\x06\x00GT\x1c\rEM\x07\x04\x0cS\x12<\x0c\x1e\x08I\x1a\t\x11O\x14L!\x1aG+\x00\x05\x1dGY\x11\x04\t\x00d&\x07S\x007\x16\x06\x0c\x1a\x17A\x1d\x01RT0_\x00 \x13\n\x05GO\x12H\x08ENe>\x16\t8E\x06\x05\x08\x1aF\x07O\x1fYx~jb6\x0c\x1d\x0fA\rH\x06U\x1a\x1b\x00\x1dBt\x04\x1e\x01I\x1a\t\x11\x02Rz\x7fI\x00H:\x00\x1a\x13I\x1aOEH\x0f\x1d\rS\x04:\x01R\x19\x01\x0bA\x13\x06\x00L1_Sb\x15\x06\x07\t\x07T\x0b\x17A\x14\x16Iy35\x0b\x1b\x01\x05\x0fF\x07O\x1dNxNH\'R\x04\x07\x0cEXH\x08A\x00O T\x08t\x0b\x1d\x19I\x02\x00\x0e\x16\\\x00R0ie\x1fI\x02\x02T\x00\x01\x0b\x07N\x02\x10S\x01&\x10\x15M\x02\x07\x02\x1fO\x1bNx0i6R\n\x01\tT\x06\x07\tSN\x02\x10S\x08;\x10\x06\x05I\x0f\x0f\x10O;\x00:_G+\x1cId3OT\x02\x10S\x1aO\x05\x16\x11t\x0c\x06M\x0f\x02\x0e\x03CRL=N\x00/\x0bI\r\x08N\x17\r\x15T\x1dO\x0e\x1cE^(\x0bM\x19\x01\x12\x07\nUSxNOb\x06\x01\x0bGS\x1d\x0c\x00\x00\x17\n\x05\x1f\x0c:B^M.\x01A"\x0e\x1cI4VAb5\x06OG*~

In [66]:
EvaluateString(b'Escape me never beloved, while I am I and you are you, so long as the world contains us both')

0.0038727210950960786

In [67]:
EvaluateString(b'sdlfajskdlfjalsdjflaksjdflkajsdl;kfaj sldkfj  alksdjflka jsdlfkajs ldfjalskdf jalksd flaksdjf lasjdf')

0.0008486532931747437

In [68]:
EvaluateString(b'^^^%%%')

0.0

In [69]:
keysize_scores = dict()

for keysize in range(2, 40):
    chunks = [ cyphertext[idx*keysize:(1+idx)*keysize] for idx in range(len(cyphertext)//keysize)  ]
    keysize_scores[keysize] = Mean([ Hamming(chunk1, chunk2)/len(chunk1) for chunk1, chunk2 in zip(chunks[:-1], chunks[1:]) ]) 
    
best_keysizes = sorted(keysize_scores, key = lambda x:keysize_scores[x])
best_keysizes[:5]

[29, 38, 13, 9, 16]

In [70]:
def BruteForceXorDecrypt(s: bytes):
    assert type(s) is bytes
    best_plaintext = ''
    best_score = -1
    best_key = -1
    
    for key in range(256):
        plaintext = bytes([ key^char for char in s ])
        score = EvaluateString(plaintext)
        if score > best_score:
            best_score = score
            best_plaintext = plaintext
            best_key = key
                    
        
    return best_key, best_score, best_plaintext
    

In [71]:
BruteForceXorDecrypt(bytes.fromhex('1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'))

(88, 0.002786125603803808, b"Cooking MC's like a pound of bacon")

In [72]:
def MultiKeyXorDecrypt(s: bytes, keysize: int):
    assert type(s) is bytes and type(keysize) is int  and 0 < keysize < len(s)
    nchunks = len(s) // keysize
    
    keys = [-1] * keysize
    scores = [-1] * keysize
    
    for n in range(keysize):
        row = bytes([ char for idx, char in enumerate(s) if idx%keysize == n ])
        keys[n], *junk = BruteForceXorDecrypt(row)
    
    plaintext = bytes([ char ^ keys[n%keysize] for n, char in enumerate(s) ]) 
    score = EvaluateString(plaintext)
    
    return keys, score, plaintext



In [73]:
keys, scores, plaintexts = MultiKeyXorDecrypt(cyphertext, 3)

In [74]:
best_score = 0
best_keys = []
best_plaintext = b''

for keysize in range(1, 40):
    keys, score, plaintext = MultiKeyXorDecrypt(cyphertext, keysize)
    print(plaintext[:100])

b'T\x0bV\x04BFKV\x06Z\x07uS ,V\x00UG\x07ZHBN\x07RH_\x0c\x7fIWH\x00-i\x1dTT\nz\x1a\x07,\x1bOI\x0e\x1dUD\x0c\x04NME\x1a[uEWA\x00S@X\x06]\x05hS\x0ebILT\x0e\x10XM@I-oN\x1aI~_OES^\x08TH\x1b\x1dy\x16'
b'O\x16M\x19Y[PK\x1dG\x1chH=7K\x1bH\\\x1aAUYS\x1cOSB\x17bRJS\x1d6t\x06IO\x17a\x07\x1c1\x00RR\x13\x06H_\x11\x1fSVX\x01FnXL\\\x1bN[E\x1d@\x1euH\x13yTWI\x15\rCP[T6rU\x07RcDR^NE\x15OU\x00\x00b\x0b'
b"T\x0cK\x04E[KQ\x1bZ\x00hS'1V\x07HG\x00GHES\x07UU_\x0bbIPU\x00*t\x1dSI\n}\x07\x07+\x06ON\x13\x1dRY\x0c\x03SMB\x07[rXWF\x1dSGE\x06Z\x18hT\x13bNQT\t\rXJ]I*rN\x1dT~XRETC\x08SU\x1b\x1ad\x16"
b'T\x07V\x03BJKQ\x06V\x07rS,,Q\x00YG\x00ZDBI\x07^HX\x0csIPH\x0c-n\x1dXT\rz\x16\x07+\x1bCI\t\x1dYD\x0b\x04BMB\x1aWuBWM\x00T@T\x06Z\x05dS\tbELS\x0e\x1cXJ@E-hN\x16Iy_CET^\x04TO\x1b\x11y\x11'
b'I\x0bQ\x02E[KQ\x00]\x1auT&+K\x00RA\x00GHEH\x00OHX\nxTWO\x06*t\x1dSR\rg\x1a\x00*\x1cRI\t\x1bRY\x0c\x03HJX\x1a\\sBJA\x07UGE\x06Z\x03oN\x0eeOKI\x0e\x17^J]I*iI\x07IyYHXSY\x0eSU\x1b\x1a\x7f\x11'
b'R\x07M\x03B[MZ\x1d]\x07hU,7Q\x00HA\x0bAOB

In [37]:
plaintext

b'T\x0cA\x04C@KK\x06]\x1bhI;!T\x1dH\x08\x01PU_O\rOHB\x0cbIW\x07\x070 \x1dRI\n}\r\x07-\x1dOT\x0e\x1aIY\x16\x1fCOX\x07\x14sOJ\\\x01Y]X\x1b]\x18hSAeT\x05T\x08\rXJWI,iN\x07IyCR_HS\nIUT\x1bs\x0bThP^L\x13\x06F\x01AC\x001>_Fl\x0cH[AR\tN\x1bV\x17-*90rGI[GB\x0bR\x01RXTT\x16=PWHOT]\x11K\x1d.6\x07^\x01rOSG\x00T\x1a\x11\x1b]YF\x07P<N\x11MUC\x02GOT\x05e\x16\x1ad[R\x07@H\x00BY\x1f]^\x060g|ENUV]\x02L\x1bIH7\r\x1cs\x1aGSE\x11\x11\x1cA\x08\x06\x01tTA;_TW\x17KHA_\x08I\x1ce=6M\rIV\x00\x06NHS\x1aJS\x07HrYA\x04K\x01LKOR\x01,y\'h\x1bBN@\x00OI\\\x07\x1dPT\x18\\o\x16IF\x1d[GS\x1brTs\x0b\x0eb\x1a\x0703\x06\x1bVY\x1dD\x06MYX EH\x18[Q\\G\x08\x06\x18;\x01C{_\x01N\\\x07CDA\x1dTI@HE\x17g_\x04W_[OE\x1c\x071\x00\x1a6USO\x0c\x07I\nOCC^M\\Xs\x16\x17\x19gHGlZ\x1c\x00{\x02\x08,kO\x07\x08c*rF\x1aUHS\x17\tcP\x01QO\x1aATMRN\x7fS\x00c^OWRY\x1dn\x11\x1eI]L\x1bHx\x111bR^\x12OM\x19\rcIOeIS\x01PTZUZ\x11\x07GOYP9B\x11\x19THZS\x077>d\nNnIO\x1cP\x1fAN\n\x01\n\x1b\\O\x06mI\x06.nH\x08\x15DR\x00~\x13\x12+\x1bTFZ\x1cX\x1ceNXTTUSp]\x06\