# Pregunta 1

In [22]:
# Standard library
from math import ceil

# Auxiliary functions

# Convert from plain text to corresponding positions in the alphabet
def text_to_numbers(text, characters):
    return [characters.index(c) for c in text]

# Convert from alphabet positions to the corresponding plain text
def numbers_to_text(numbers, characters):
    return ''.join([characters[n] for n in numbers])

# Encrypt plain text using RP
def encrypt(text, key, alphabet):
    numbered_text = text_to_numbers(text, alphabet)
    numbered_key = text_to_numbers(key, alphabet)
    encrypted = []
    for idx, n in enumerate(numbered_text):
        encrypted.append((n + numbered_key[idx % len(key)]) % len(alphabet))
    return numbers_to_text(encrypted, alphabet)

# Decrypt ciphertext using RP
def decrypt(cipher, key, alphabet):
    numbered_text = text_to_numbers(cipher, alphabet)
    numbered_key = text_to_numbers(key, alphabet)
    decrypted = []
    for idx, n in enumerate(numbered_text):
        decrypted.append((n - numbered_key[idx % len(key)]) % len(alphabet))
    return numbers_to_text(decrypted, alphabet)

# Calculate absolute distance between a string and the distribution of letters over an alphabet
def abs_distance(string, frequencies):
    return sum([abs(frequencies[c] - string.count(c) / len(string)) for c in frequencies])

In [30]:
# Estimate the character in a specific key position using frequency analysis
def get_probable_char(text, frequencies, distance):
    alphabet = list(frequencies)
    best_char = ''
    smallest_distance = float('inf')
    for char in alphabet:
        current_numbers = [(n - alphabet.index(char)) % len(alphabet) for n in text_to_numbers(text, alphabet)]
        current_text = numbers_to_text(current_numbers, alphabet)
        current_distance = distance(current_text, frequencies)
        if current_distance < smallest_distance:
            smallest_distance = current_distance
            best_char = char
    return best_char

# Estimate an encryption key using frequency analysis
def get_probable_key(distributions, frequencies, distance):
    key = ''
    for key_pos_dist in distributions.values():
        key += get_probable_char(key_pos_dist, frequencies, distance)
    return key

# Break Repeated Pad using frequency analysis
def break_rp(ciphertext, frequencies, distance):
    """
    Arguments:
        ciphertext: An arbitrary string representing the encrypted version of a plaintext.
        frequencies: A dictionary representing a character frequency over the alphabet.
        distance: A function indicating how distant is a string from following a character frequency.
    Returns:
        key: A guess of the key used to encrypt the ciphertext, assuming that the plaintext message was written in a language in which
        letters distribute according to frequencies.
    """
    # TODO: make this key size arbitrary
    key_size = 27

    n_pads = ceil(len(ciphertext) / key_size)
    key_pos_distributions = {pos: '' for pos in range(key_size)}
    for pad in range(n_pads):
        for i in range(min(key_size, len(ciphertext) - pad * key_size)):
            key_pos_distributions[i] += ciphertext[pad * key_size + i]

    best_key = get_probable_key(key_pos_distributions, frequencies, distance)

    return best_key

In [31]:
# TODO: more examples and better testing with files
# Test Repeated Pad breaking
if __name__ == '__main__':
    # Letter frequency for the english alphabet
    english_freq = {'a': 0.0817,
                    'b': 0.0129,
                    'c': 0.0276,
                    'd': 0.0425,
                    'e': 0.1288,
                    'f': 0.0223,
                    'g': 0.0202,
                    'h': 0.0609,
                    'i': 0.0697,
                    'j': 0.0015,
                    'k': 0.0077,
                    'l': 0.0403,
                    'm': 0.0241,
                    'n': 0.0675,
                    'o': 0.0751,
                    'p': 0.0193,
                    'q': 0.001,
                    'r': 0.0599,
                    's': 0.0633,
                    't': 0.0906,
                    'u': 0.0278,
                    'v': 0.0098,
                    'w': 0.0236,
                    'x': 0.0015,
                    'y': 0.0197,
                    'z': 0.0007}

    # Plain text of 1372 characters
    plain_text = 'fifteenhoursintoeldenringidefeatedgodrickthefirstoffiveeldenlordsinthetimebetweenemergingintothelandsbetweenandstrikinghimdownihaddiscovereddecrepitruinsventuredintotwistingcavesstumbleduponenemyencampmentsandbattledtoothandnailagainstchallengingbossesfromsoftwaresgameshavealwaysmadeyoufeelsmallinmanywaystheytellyouthatyouareworthlessaplagueriddenratoraccursedundeadunfiteventobecinderstheyaskyoutonavigateunflinchingbrutalworldsandpityouagainstenemiesthatsystematicallydismantleyouregoeldenringmaintainsthenailbitingcombatandairofmysterythathasdistinguishedfromsoftwaressoulsbornegamesbutitiselevatedtonewheightsbythestudiosinterpretationofwhatanopenworldgamecanbehavingbroughtdowngodrickthebreadthoftheworldandthewayinwhichfromsoftwarehasapplieditssignaturestyletoanopenworldwasonfulldisplayreinforcinghowinsignificantireallywasanddrivinghomethemagnitudeofthetaskthatstillawaitedmeinagenrethathasbecomewroughtwithbloatedandoverdesignedgameseldenringisdefiantlycontrarianinalmosteverywayitscommitmenttodesignbysubtractionandtoplacingtheresponsibilityofchartingapaththroughitsworldentirelyontheplayermakesitstandheadandshouldersaboveotheropenworldtitleseldenringtakestheshardsofwhatcamebeforeandforgesthemintosomethingthatwillgodowninhistoryasoneofthealltimegreatsatriumphindesignandcreativityandanopenworldgamethatdistinguishesitselfforwhatitdoesnotdoasmuchaswhatitdoes'

    # Random key of 27 characters
    rp_key = 'aworiaqalidhgsgpmlokpfhotbn'

    # Encrypted text
    encrypted_text = encrypt(plain_text, rp_key, list(english_freq))

    # Break RP test
    possible_key = break_rp(encrypted_text, english_freq, abs_distance)
    print(possible_key)

    # Compare the resulting text with the original
    decrypted_text = decrypt(encrypted_text, possible_key, list(english_freq))
    print(decrypted_text)

aworiaqalidhgsgpmlokpfhotbn
fifteenhoursintoeldenringidefeatedgodrickthefirstoffiveeldenlordsinthetimebetweenemergingintothelandsbetweenandstrikinghimdownihaddiscovereddecrepitruinsventuredintotwistingcavesstumbleduponenemyencampmentsandbattledtoothandnailagainstchallengingbossesfromsoftwaresgameshavealwaysmadeyoufeelsmallinmanywaystheytellyouthatyouareworthlessaplagueriddenratoraccursedundeadunfiteventobecinderstheyaskyoutonavigateunflinchingbrutalworldsandpityouagainstenemiesthatsystematicallydismantleyouregoeldenringmaintainsthenailbitingcombatandairofmysterythathasdistinguishedfromsoftwaressoulsbornegamesbutitiselevatedtonewheightsbythestudiosinterpretationofwhatanopenworldgamecanbehavingbroughtdowngodrickthebreadthoftheworldandthewayinwhichfromsoftwarehasapplieditssignaturestyletoanopenworldwasonfulldisplayreinforcinghowinsignificantireallywasanddrivinghomethemagnitudeofthetaskthatstillawaitedmeinagenrethathasbecomewroughtwithbloatedandoverdesignedgameseldenringisdefiantlycontrari