# Pregunta 1

In [72]:
# Standard library
import os
from math import ceil
from random import SystemRandom

# Auxiliary functions

# Convert from plain text to corresponding positions in the alphabet
def text_to_numbers(text, characters):
    return [characters.index(c) for c in text]

# Convert from alphabet positions to the corresponding plain text
def numbers_to_text(numbers, characters):
    return ''.join([characters[n] for n in numbers])

# Encrypt plain text using RP
def encrypt(text, key, alphabet):
    numbered_text = text_to_numbers(text, alphabet)
    numbered_key = text_to_numbers(key, alphabet)
    encrypted = []
    for idx, n in enumerate(numbered_text):
        encrypted.append((n + numbered_key[idx % len(key)]) % len(alphabet))
    return numbers_to_text(encrypted, alphabet)

# Decrypt ciphertext using RP
def decrypt(cipher, key, alphabet):
    numbered_text = text_to_numbers(cipher, alphabet)
    numbered_key = text_to_numbers(key, alphabet)
    decrypted = []
    for idx, n in enumerate(numbered_text):
        decrypted.append((n - numbered_key[idx % len(key)]) % len(alphabet))
    return numbers_to_text(decrypted, alphabet)

# Calculate absolute distance between a string and the distribution of letters over an alphabet
def abs_distance(string, frequencies):
    return sum([abs(frequencies[c] - string.count(c) / len(string)) for c in frequencies])

In [73]:
# Hacking

# Estimate the character in a specific key position using frequency analysis
def get_probable_char(text, frequencies, distance):
    alphabet = list(frequencies)
    best_char = ''
    smallest_distance = float('inf')
    for char in alphabet:
        current_numbers = [(n - alphabet.index(char)) % len(alphabet) for n in text_to_numbers(text, alphabet)]
        current_text = numbers_to_text(current_numbers, alphabet)
        current_distance = distance(current_text, frequencies)
        if current_distance < smallest_distance:
            smallest_distance = current_distance
            best_char = char
    return best_char

# Estimate an encryption key using frequency analysis
def get_probable_key(distributions, frequencies, distance):
    key = ''
    for key_pos_dist in distributions.values():
        key += get_probable_char(key_pos_dist, frequencies, distance)
    return key

# Break Repeated Pad using frequency analysis
def break_rp(ciphertext, frequencies, distance):
    """
    Arguments:
        ciphertext: An arbitrary string representing the encrypted version of a plaintext.
        frequencies: A dictionary representing a character frequency over the alphabet.
        distance: A function indicating how distant is a string from following a character frequency.
    Returns:
        key: A guess of the key used to encrypt the ciphertext, assuming that the plaintext message was written in a language in which
        letters distribute according to frequencies.
    """
    # Get an estimated key for every possible key_size allowed, then select the one
    # that minimizes distance to the frequency distribution
    best_key = ''
    best_distance = float('inf')
    for key_size in range(1, (len(ciphertext) // 50) + 1):
        n_pads = ceil(len(ciphertext) / key_size)
        key_pos_distributions = {pos: '' for pos in range(key_size)}
        for pad in range(n_pads):
            for i in range(min(key_size, len(ciphertext) - pad * key_size)):
                key_pos_distributions[i] += ciphertext[pad * key_size + i]
        current_key = get_probable_key(key_pos_distributions, frequencies, distance)
        current_plain_text = decrypt(ciphertext, current_key, list(frequencies))
        current_distance = distance(current_plain_text, frequencies)
        if current_distance < best_distance:
            best_distance = current_distance
            best_key = current_key
    return best_key

In [74]:
# Testing

# Generate tests for a plain text with every possible key size
def generate_tests(text, alphabet, name):
    for size in range(1, (len(text) // 50) + 1):
        key = ''.join(SystemRandom().choice(alphabet) for _ in range(size))
        cipher = encrypt(text, key, alphabet)
        with open(f'tests/{name}_{size}.txt', 'w') as file:
            file.write(f'{cipher} {key}\n')

# Run a specific test
def run_test(path):
    with open(path, 'r') as file:
        test = file.readline().strip('\n').split(' ')
        cipher = test[0]
        key = test[1]
        result = break_rp(cipher, english_freq, abs_distance)
        if result == key:
            return True
    return False

# Run all tests inside a directory
def run_all_tests(dir_path):
    results = []
    for subdir, _, files in os.walk(dir_path):
        for f in files:
            result = run_test(os.path.join(subdir, f))
            print(result)
            results.append(result)
    print(f'Testing done: {sum(results)}/{len(results)} tests answered correctly')

In [75]:
# Test Repeated Pad breaking
if __name__ == '__main__':
    # Letter frequency for the english alphabet
    english_freq = {'a': 0.0817,
                    'b': 0.0129,
                    'c': 0.0276,
                    'd': 0.0425,
                    'e': 0.1288,
                    'f': 0.0223,
                    'g': 0.0202,
                    'h': 0.0609,
                    'i': 0.0697,
                    'j': 0.0015,
                    'k': 0.0077,
                    'l': 0.0403,
                    'm': 0.0241,
                    'n': 0.0675,
                    'o': 0.0751,
                    'p': 0.0193,
                    'q': 0.001,
                    'r': 0.0599,
                    's': 0.0633,
                    't': 0.0906,
                    'u': 0.0278,
                    'v': 0.0098,
                    'w': 0.0236,
                    'x': 0.0015,
                    'y': 0.0197,
                    'z': 0.0007}

    # Plain text examples
    review_text = 'fifteenhoursintoeldenringidefeatedgodrickthefirstoffiveeldenlordsinthetimebetweenemergingintothelandsbetweenandstrikinghimdownihaddiscovereddecrepitruinsventuredintotwistingcavesstumbleduponenemyencampmentsandbattledtoothandnailagainstchallengingbossesfromsoftwaresgameshavealwaysmadeyoufeelsmallinmanywaystheytellyouthatyouareworthlessaplagueriddenratoraccursedundeadunfiteventobecinderstheyaskyoutonavigateunflinchingbrutalworldsandpityouagainstenemiesthatsystematicallydismantleyouregoeldenringmaintainsthenailbitingcombatandairofmysterythathasdistinguishedfromsoftwaressoulsbornegamesbutitiselevatedtonewheightsbythestudiosinterpretationofwhatanopenworldgamecanbehavingbroughtdowngodrickthebreadthoftheworldandthewayinwhichfromsoftwarehasapplieditssignaturestyletoanopenworldwasonfulldisplayreinforcinghowinsignificantireallywasanddrivinghomethemagnitudeofthetaskthatstillawaitedmeinagenrethathasbecomewroughtwithbloatedandoverdesignedgameseldenringisdefiantlycontrarianinalmosteverywayitscommitmenttodesignbysubtractionandtoplacingtheresponsibilityofchartingapaththroughitsworldentirelyontheplayermakesitstandheadandshouldersaboveotheropenworldtitleseldenringtakestheshardsofwhatcamebeforeandforgesthemintosomethingthatwillgodowninhistoryasoneofthealltimegreatsatriumphindesignandcreativityandanopenworldgamethatdistinguishesitselfforwhatitdoesnotdoasmuchaswhatitdoes'

    # Generate test files
    # generate_tests(text, list(english_freq), 'name')

    # Run tests
    # run_all_tests('tests')

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
Testing done: 27/27 tests answered correctly
