In [31]:
import string
import requests

In [32]:
hex_digits = "0123456789abcdef"
values = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+/"


def split_chunks(l, n):
    ch = []
    for x in l:
        ch.append(x)
        if len(ch) == n:
            yield ch
            ch = []
    if ch:
        yield ch


def hex_decode(text):
    lookup = {y: x for x, y in enumerate(hex_digits)}

    res = []

    for first, second in split_chunks(text, 2):
        first = lookup[first]
        second = lookup[second]

        byte = first << 4 | second
        res.append(byte)

    return bytes(res)


def groups(data, n):
    group = []
    for x in data:
        group.append(x)
        if len(group) == n:
            yield "".join(group)
            group = []
    if group:
        yield "".join(group)


def b64_groups(data):
    digits = binary_digits(data)
    for num in groups(digits, 6):
        num = int(num, 2)
        yield values[num]


def base64_encode(data):
    res = ""
    for b64 in groups(b64_groups(data), 4):
        res += b64
        for _ in range(4 - len(b64)):
            res += "="

    return res


def b64_to_binary(text):
    for c in text:
        x = values.find(c)
        if x != -1:
            for i in range(6)[::-1]:
                bit = (x >> i) & 1
                yield str(int(bit))


def binary_digits(data):
    for byte in data:
        for i in range(8)[::-1]:
            i = (byte >> i) & 1
            yield str(int(i))


def base64_decode(text):
    res = []
    for gr in groups(b64_to_binary(text), 8):
        if len(gr) != 8:
            break
        res.append(int(gr, 2))

    return bytes(res)


def single_xor(ciphertext, key):
    plain = [x ^ key for x in ciphertext]
    return bytes(plain)


def repeating_xor(data, key):
    res = []
    for i, c in enumerate(data):
        k = key[i % len(key)]
        res.append(c ^ k)
    return bytes(res)

In [33]:
def edit_dist(buf1, buf2):
    bin1 = binary_digits(buf1)
    bin2 = binary_digits(buf2)

    dist = 0
    for b1, b2 in zip(bin1, bin2):
        if b1 != b2:
            dist += 1

    return dist


edit_dist(b"this is a test", b"wokka wokka!!!") == 37

True

In [34]:
URL = "https://cryptopals.com/static/challenge-data/6.txt"
text = requests.get(URL).text
data = base64_decode(text)

len(data)

2876

In [35]:
def keysize_edit_distance(ciphertext, keysize):
    prev = None
    diff = 0
    n = 0
    for i in range(0, len(data), keysize):
        chunk = data[i : i + keysize]
        if prev:
            diff += edit_dist(chunk, prev) / keysize
            n += 1
        prev = chunk
    diff /= n
    return diff


keysize = min(range(2, 40), key=lambda x: keysize_edit_distance(data, x))

keysize

29

In [36]:
def english_score(data):
    s = 0
    data = data.lower()
    common = b"etaoin shrdlu"[::-1]

    for c in data:
        if chr(c) not in string.printable:
            return 0

        i = common.find(c)
        if i != -1:
            s += i

    return s

In [37]:
key = []

blocks = [data[i : i + keysize] for i in range(0, len(data), keysize)]

for key_i in range(keysize):
    chunk = b""
    for bl in blocks:
        if key_i < len(bl):
            chunk += bytes([bl[key_i]])

    k = max(range(255), key=lambda x: english_score(single_xor(chunk, x)))
    key.append(k)

In [38]:
bytes(key).decode("ascii")

'Terminator X: Bring the noise'

In [39]:
plaintext = repeating_xor(data, bytes(key)).decode("ascii")

print(plaintext[:150])

I'm back and I'm ringin' the bell 
A rockin' on the mike while the fly girls yell 
In ecstasy in the back of me 
Well that's my DJ Deshay cuttin' all 
