In [1]:
#Ref code:

import base64


def get_english_score(input_bytes):
    """Compares each input byte to a character frequency 
    chart and returns the score of a message based on the
    relative frequency the characters occur in the English
    language.
    """

    # From https://en.wikipedia.org/wiki/Letter_frequency
    # with the exception of ' ', which I estimated.
    character_frequencies = {
        'a': .08167, 'b': .01492, 'c': .02782, 'd': .04253,
        'e': .12702, 'f': .02228, 'g': .02015, 'h': .06094,
        'i': .06094, 'j': .00153, 'k': .00772, 'l': .04025,
        'm': .02406, 'n': .06749, 'o': .07507, 'p': .01929,
        'q': .00095, 'r': .05987, 's': .06327, 't': .09056,
        'u': .02758, 'v': .00978, 'w': .02360, 'x': .00150,
        'y': .01974, 'z': .00074, ' ': .13000
    }
    return sum([character_frequencies.get(chr(byte), 0) for byte in input_bytes.lower()])


def single_char_xor(input_bytes, char_value):
    """Returns the result of each byte being XOR'd with a single value.
    """
    output_bytes = b''
    for byte in input_bytes:
        output_bytes += bytes([byte ^ char_value])
    return output_bytes


def bruteforce_single_char_xor(ciphertext):
    """Performs a singlechar xor for each possible value(0,255), and
    assigns a score based on character frequency. Returns the result
    with the highest score.
    """
    potential_messages = []
    for key_value in range(256):
        message = single_char_xor(ciphertext, key_value)
        score = get_english_score(message)
        data = {
            'message': message,
            'score': score,
            'key': key_value
            }
        potential_messages.append(data)
    return sorted(potential_messages, key=lambda x: x['score'], reverse=True)[0]


def break_repeating_key_xor(ciphertext):
    """Attempts to break repeating-key XOR encryption.
    """
    average_distances = []

    # Take the keysize from suggested range 
    for keysize in range(2,41):

        # Initialize list to store Hamming distances for this keysize 
        distances = []

        # Break the ciphertext into chunks the length of the keysize
        chunks = [ciphertext[i:i+keysize] for i in range(0, len(ciphertext), keysize)]
        
        while True:
            try:
                # Take the two chunks at the beginning of the list and 
                # get the Hamming distance 
                chunk_1 = chunks[0]
                chunk_2 = chunks[1]
                distance = calculate_hamming_distance(chunk_1, chunk_2)

                # Normalize this result by dividing by KEYSIZE
                distances.append(distance/keysize)

                # Remove these chunks so when the loop starts over, the
                # Hamming distance for the next two chunks can be calculated
                del chunks[0]
                del chunks[1]

            # When an exception occurs (indicating all chunks have 
            # been processed) break out of the loop.
            except Exception as e:
                break
        result = {
            'key': keysize,
            'avg distance': sum(distances) / len(distances)
            }
        average_distances.append(result)
    possible_key_lengths = sorted(average_distances, key=lambda x: x['avg distance'])[0]
    possible_plaintext = []

    # Will populate with a single character as each transposed 
    # block has been single-byte XOR brute forced
    key = b''
    possible_key_length = possible_key_lengths['key']
    for i in range(possible_key_length):
        
        # Creates an block made up of each nth byte, where n
        # is the keysize
        block = b''
        for j in range(i, len(ciphertext), possible_key_length):
            block += bytes([ciphertext[j]])
        key += bytes([bruteforce_single_char_xor(block)['key']]) 
    possible_plaintext.append((repeating_key_xor(ciphertext, key), key)) 
    return max(possible_plaintext, key=lambda x: get_english_score(x[0]))


def repeating_key_xor(message_bytes, key):
    """Returns message XOR'd with a key. If the message, is longer
    than the key, the key will repeat.
    """
    output_bytes = b''
    index = 0
    for byte in message_bytes:
        output_bytes += bytes([byte ^ key[index]])
        if (index + 1) == len(key):
            index = 0
        else:
            index += 1
    return output_bytes


def calculate_hamming_distance(input_bytes_1, input_bytes_2):
    """Finds and returns the Hamming distance (number of differing 
    bits) between two byte-strings
    """
    hamming_distance = 0
    for b1, b2 in zip(input_bytes_1, input_bytes_2):
        difference = b1 ^ b2

        # Count the number of differences ('1's) and add to the hamming distance
        hamming_distance += sum([1 for bit in bin(difference) if bit == '1'])
    return hamming_distance


def main():
    with open('6.txt') as input_file:
        ciphertext = base64.b64decode(input_file.read())
    result, key = break_repeating_key_xor(ciphertext)
    print("Key: {}\nMessage: {}".format(key, result))


if __name__ == '__main__':
    main()

Key: b'Terminator X: Bring the noise'
Message: b"I'm back and I'm ringin' the bell \nA rockin' on the mike while the fly girls yell \nIn ecstasy in the back of me \nWell that's my DJ Deshay cuttin' all them Z's \nHittin' hard and the girlies goin' crazy \nVanilla's on the mike, man I'm not lazy. \n\nI'm lettin' my drug kick in \nIt controls my mouth and I begin \nTo just let it flow, let my concepts go \nMy posse's to the side yellin', Go Vanilla Go! \n\nSmooth 'cause that's the way I will be \nAnd if you don't give a damn, then \nWhy you starin' at me \nSo get off 'cause I control the stage \nThere's no dissin' allowed \nI'm in my own phase \nThe girlies sa y they love me and that is ok \nAnd I can dance better than any kid n' play \n\nStage 2 -- Yea the one ya' wanna listen to \nIt's off my head so let the beat play through \nSo I can funk it up and make it sound good \n1-2-3 Yo -- Knock on some wood \nFor good luck, I like my rhymes atrocious \nSupercalafragilisticexpialidocious \nI

In [180]:
#My code: 
#set[6]
import base64
import pdb
from set14win import check_occ,dictionary_to_sorted_string
#b64texts=[base64.b64decode(''.join(x.splitlines())) for x in open('6.txt').readlines()]
b64texts=base64.b64decode(open('6.txt').read())
'''
https://cryptopals.com/static/challenge-data/6.txt 
It's been base64'd after being encrypted with repeating-key XOR.
Decrypt it.
'''
'''
1.Let KEYSIZE be the guessed length of the key; try values from 2 to (say) 40.
Write a function to compute the edit distance/Hamming distance between two strings. The Hamming distance is just the number of differing bits. 
The distance between: 
'''
#Do 1:
def find_distance(text1:bytes,text2:bytes):
    assert len(text2)==len(text2)
    distance=''.join([bin(x^y) for x,y in zip(text1,text2)]).count('1')
    return distance
    #This FUNCTION WORKS!
#DONE V
'''
2. For each KEYSIZE, take the first KEYSIZE worth of bytes, and the second KEYSIZE worth of bytes,
 and find the edit distance between them. Normalize this result by dividing by KEYSIZE. 
'''
def break_to_chunks(b64decoded:bytes):
    '''
    Takes the decoded b64 text and breaks it into byte sizes between 2-42 as suggested]
    '''
    data={}
    # data looks like this : {bytesize:chunks of the b64 decoded text}
    for bytesize in range(2,41):
        chunks=[b64texts[i:i+bytesize] for i in range(0,len(b64texts),bytesize)]
        if len(chunks[-1])!= bytesize:
            del chunks[-1]
        data.update({bytesize:chunks})
    return data
# DONE V

'''
For each KEYSIZE, take the first KEYSIZE worth of bytes, and the second KEYSIZE worth of bytes, 
and find the edit distance between them.
 Normalize this result by dividing by KEYSIZE.
'''
'''
The KEYSIZE with the smallest normalized edit distance is probably the key. 
You could proceed perhaps with the smallest 2-3 KEYSIZE values. 
Or take 4 KEYSIZE blocks instead of 2 and average the distances.
'''
def sort_dict(x:dict):
    return {k: v for k, v in sorted(x.items(), key=lambda item: item[1])}

def calc_avg_distace(chunks):
    keysize=len(chunks[0])
    data={} #data is {keysize:distance}
    chunk01=find_distance(chunks[1], chunks[2]) /keysize
    chunk23=find_distance(chunks[2], chunks[3]) /keysize
    avg=sum([chunk01,chunk23])/4
    data.update({keysize:find_distance(chunks[1], chunks[2]) /keysize})
    return data
    
def transpose(chunks:bytes):
    '''Take the first byte from every block and make a list out of it'''
    data={}
    keysize=400
    try:
        for x in range(0,keysize+1):
            data.update({x:b''.join([bytes([list(chunks)[i][x][0]]) for i in range(0,len(list(chunks)))])})
    except Exception:
        pass
    return data  
    

def most_common(lst):
    return max(set(lst), key=lst.count)
   
def single_byte_XOR_final(bytess):
    data={}
    for char in range(256):
        data.update({char:b''.join([bytes([byte^char]) for byte,char in zip(bytess,[char]*len(bytess))])})
    return data

def check_scores(b:bytes):
    b=b.lower()
    score=0
    occurance_english={
        'a': .08167, 'b': .01492, 'c': .02782, 'd': .04253,
        'e': .12702, 'f': .02228, 'g': .02015, 'h': .06094,
        'i': .06094, 'j': .00153, 'k': .00772, 'l': .04025,
        'm': .02406, 'n': .06749, 'o': .07507, 'p': .01929,
        'q': .00095, 'r': .05987, 's': .06327, 't': .09056,
        'u': .02758, 'v': .00978, 'w': .02360, 'x': .00150,
        'y': .01974, 'z': .00074, ' ': .13000
        }
    for byte in b:
        if chr(byte) in list(occurance_english.keys()):
            score+=occurance_english[chr(byte)]
    return score

def repeating_key_XOR(text:bytes,cipher):
    return b''.join([bytes([x^y]) for x,y in zip(text,cipher*len(text))])

def main():
    pass
if __name__ == '__main__':
    chunks=break_to_chunks(b64texts)
    avreges={}
    for bytesize in chunks.keys():
        avreges.update(calc_avg_distace(chunks[bytesize]))
    avreges=sort_dict(avreges)
    #first byte from each chunk
    transposed=transpose(list(chunks.values()))

#My next task:
# Find the key of each trasposed byte array.


            

        

In [137]:
pos=1
transposedXOR={pos:list(single_byte_XOR_final(transposed[pos]).values())}

In [100]:
bytes([[check_scores(x) for x in transposedXOR[pos]].index(max([check_scores(x) for x in transposedXOR[pos]]))])

b'R'

In [95]:
transposedXOR[0]

[b'\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d\x1d',
 b'\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c\x1c',
 b'\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f\x1f',
 b'\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e\x1e',
 b'\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19\x19',
 b'\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18\x18',
 b'\x1b\x1b\x1b\x1b\x1b\x1b\

In [94]:
sorted([check_scores(x) for x in transposedXOR[0]],reverse=True)

[499.7407350000003,
 499.7407350000003,
 356.29444889999996,
 356.29444889999996,
 321.3181062000001,
 321.3181062000001,
 295.3514148,
 295.3514148,
 265.52906639999986,
 265.52906639999986,
 248.92612289999994,
 248.92612289999994,
 239.75909490000004,
 239.75909490000004,
 239.75909490000004,
 239.75909490000004,
 235.54934520000015,
 235.54934520000015,
 167.32776840000002,
 167.32776840000002,
 158.35746029999993,
 158.35746029999993,
 109.45352730000002,
 109.45352730000002,
 108.50928270000004,
 108.50928270000004,
 94.66038269999997,
 94.66038269999997,
 92.85058379999994,
 92.85058379999994,
 87.65724629999991,
 87.65724629999991,
 79.27708620000001,
 79.27708620000001,
 77.66400329999996,
 77.66400329999996,
 75.8935476,
 75.8935476,
 58.70045220000002,
 58.70045220000002,
 38.47791089999999,
 38.47791089999999,
 30.373157099999965,
 30.373157099999965,
 6.019548599999995,
 6.019548599999995,
 5.901519000000001,
 5.901519000000001,
 3.7376274000000023,
 3.7376274000000023,
 2

In [185]:
pos0=b''.join([bytes([byte[0]]) for byte in chunks[30]])
#this is the first byte of all the keysize lenghs of the cipher text in len 2
pos0score=check_scores(pos0)
pos0

b"\x1d\x00\x1e\x0c\x08\x1aA\x07\x16\x1b\x00=NA*\x1d\x1c\x0f\x02\x00\rb\x0bAd\n\x02S\n<\x0c\x17M\x01=\x056\x02\x1bY6\x1aE'R\x19N\x06OT\x07\r\x00\x00\x07\x1aS\x165\n\x1b\x0c\x00\x01\x08T\x06\x07\x001\x1a\x00b+I\x06GN\x11\x1dIN\t\x06IS\n-\x00\x17\x0c\x10\x08\x16\x11"

In [139]:
[check_scores(x) for x in single_byte_XOR_final(pos0).values()].index(max([check_scores(x) for x in single_byte_XOR_final(pos0).values()]))

84

In [140]:
chr(84)

'T'

In [189]:
keysize=29
key=b''
transposed=[b''.join([bytes([byte[i]]) for byte in chunks[keysize]]) for i in range(keysize)]
for t in transposed:
    xor=single_byte_XOR_final(t)
    scores=[check_scores(x) for x in xor.values()]
    key+=bytes([scores.index(max(scores))])
    

In [190]:
repeating_key_XOR(b64texts,key)

b"I'm back and I'm ringin' the bell \nA rockin' on the mike while the fly girls yell \nIn ecstasy in the back of me \nWell that's my DJ Deshay cuttin' all them Z's \nHittin' hard and the girlies goin' crazy \nVanilla's on the mike, man I'm not lazy. \n\nI'm lettin' my drug kick in \nIt controls my mouth and I begin \nTo just let it flow, let my concepts go \nMy posse's to the side yellin', Go Vanilla Go! \n\nSmooth 'cause that's the way I will be \nAnd if you don't give a damn, then \nWhy you starin' at me \nSo get off 'cause I control the stage \nThere's no dissin' allowed \nI'm in my own phase \nThe girlies sa y they love me and that is ok \nAnd I can dance better than any kid n' play \n\nStage 2 -- Yea the one ya' wanna listen to \nIt's off my head so let the beat play through \nSo I can funk it up and make it sound good \n1-2-3 Yo -- Knock on some wood \nFor good luck, I like my rhymes atrocious \nSupercalafragilisticexpialidocious \nI'm an effect and that you can bet \nI can take 

b'\x1dB\x1fM\x0b\x0f\x02\x1fO\x13N<\x1aie\x1fI\x1c\x0eN\x13\x01\x0b\x07N\x1b\x01\x16E6\x00\x1e\x01Id T\x1d\x1dC3SNeR\x06\x00GT\x1c\rEM\x07\x04\x0cS\x12<\x0c\x1e\x08I\x1a\t\x11O\x14L!\x1aG+\x00\x05\x1dGY\x11\x04\t\x00d&\x07S\x007\x16\x06\x0c\x1a\x17A\x1d\x01RT0_\x00 \x13\n\x05GO\x12H\x08ENe>\x16\t8E\x06\x05\x08\x1aF\x07O\x1fYx~jb6\x0c\x1d\x0fA\rH\x06U\x1a\x1b\x00\x1dBt\x04\x1e\x01I\x1a\t\x11\x02Rz\x7fI\x00H:\x00\x1a\x13I\x1aOEH\x0f\x1d\rS\x04:\x01R\x19\x01\x0bA\x13\x06\x00L1_Sb\x15\x06\x07\t\x07T\x0b\x17A\x14\x16Iy35\x0b\x1b\x01\x05\x0fF\x07O\x1dNxNH\'R\x04\x07\x0cEXH\x08A\x00O T\x08t\x0b\x1d\x19I\x02\x00\x0e\x16\\\x00R0ie\x1fI\x02\x02T\x00\x01\x0b\x07N\x02\x10S\x01&\x10\x15M\x02\x07\x02\x1fO\x1bNx0i6R\n\x01\tT\x06\x07\tSN\x02\x10S\x08;\x10\x06\x05I\x0f\x0f\x10O;\x00:_G+\x1cId3OT\x02\x10S\x1aO\x05\x16\x11t\x0c\x06M\x0f\x02\x0e\x03CRL=N\x00/\x0bI\r\x08N\x17\r\x15T\x1dO\x0e\x1cE^(\x0bM\x19\x01\x12\x07\nUSxNOb\x06\x01\x0bGS\x1d\x0c\x00\x00\x17\n\x05\x1f\x0c:B^M.\x01A"\x0e\x1cI4VAb5\x06OG*~

In [188]:
key

b'inient nenIrno n n nieotio iri'

In [201]:
bytes('גבריאל','UTF-8')

b'\xd7\x92\xd7\x91\xd7\xa8\xd7\x99\xd7\x90\xd7\x9c'

SyntaxError: bytes can only contain ASCII literal characters. (<ipython-input-193-44d18c882ab2>, line 1)