In [1]:
import math
import os
from typing import List, Dict, Optional, Callable

In [2]:
freq = {
    'B': 0.015, 'A': 0.082, 'C': 0.027, 'D': 0.047, 'E': 0.13, 'F': 0.022, 'G': 0.02,
    'H': 0.062, 'I': 0.069, 'J': 0.0016, 'K': 0.0081, 'L': 0.04, 'M': 0.027, 'N': 0.067,
    'O': 0.078, 'P': 0.019, 'Q': 0.0011, 'R': 0.059, 'S': 0.062, 'T': 0.096,
    'U': 0.027, 'V': 0.0097, 'W': 0.024, 'X': 0.0015, 'Y': 0.02, 'Z': 0.00078
}

In [3]:
def decipher_with_one_key(ciphertext: str, key: str, alphabet: dict, inverse_alphabet: dict):
  original_text = ""
  for idx, char in enumerate(ciphertext):
    original_text += inverse_alphabet[
      (alphabet[char] - alphabet[key[idx % len(key)]]) % len(alphabet)
    ]
  return original_text
  

In [11]:
def break_rp(
    ciphertext: str,
    frequencies: Dict[str, float],
    distance: Callable[[str, Dict[str, float]], float],
) -> str:
    """
    Arguments :
        ciphertext : An abritrary string representing the
                     encrypted version of a plaintext .
        frequencies : A dictionary representing a character
                      frequency over the alphabet .
        distance : A function indicating how distant is a string
                   from following a character frequency
    Returns :
        key : A guess of the key used to encrypt the ciphertext , assuming
              that the plaintext message was written in a language in which
              letters distribute according to frequencies.
    """
    # Initialize alphabet to get indexes and values
    alphabet = {char: idx  for idx, char in enumerate(sorted(frequencies.keys()))}
    inverse_alphabet = {idx: char  for idx, char in enumerate(sorted(frequencies.keys()))}

    # Key lenght
    max_key_lenght = math.floor(len(ciphertext)/50)
    # Initialize min distance
    min_dist = 1000
    # Initialize best key
    best_key = ""
    # Iterate over every key lenght
    for key_lenght in range(1, max_key_lenght + 1):
      # Initialize the key
      key = ""
      # Iterate on every key index and concatenate all the chars associated with
      # that key.
      for key_idx in range(key_lenght):
        # Initialize cipher chars positioned every key_lenght characters
        cipher_chars = ""
        # Get the number of chars that are associated to each key
        chars_per_key = math.floor((len(ciphertext) - 1 - key_idx)/key_lenght)
        # Iterate over the cipher indexes and jumping "key_lenght" between each index
        for cipher_idx in range(chars_per_key):
          char = ciphertext[(cipher_idx * key_lenght) + key_idx]
          # If char is in frequencies, concatenate
          if char in frequencies:
            cipher_chars += char

        # With the current key search the best char so that if this char is used
        # as a key, the char frecuencies of the decoded text are going to be similar
        # to the given frequencies.
        min_distance = 1000
        best_key_char = ""
        for possible_key in frequencies:
          # Get the decoded text for the current key using an arbitrary key
          decoded_text = decipher_with_one_key(
              cipher_chars,
              possible_key,
              alphabet,
              inverse_alphabet
          )
          # Calculate distance to the given frequencies
          distance = abs_distance(decoded_text, frequencies)
          # If the distance is lower than the current min distance save it
          if distance < min_distance:
            min_distance = distance
            best_key_char = possible_key
        key += best_key_char
      # Decode string with this key an get the distance with frequencies
      decoded_text = decipher_with_one_key(ciphertext, key, alphabet, inverse_alphabet)
      distance = abs_distance(decoded_text, frequencies)
      if distance < min_dist:
        min_dist = distance
        best_key = key
    return best_key
    


In [5]:
def abs_distance(string: str, frequencies: Dict[str, float]) -> float:
    """
    Arguments :
        string : An abritrary string
        frequencies : A dictionary representing a character frequency
    Returns :
        distance : How distant is the string from the character frequency
    """

    return sum([
        abs(frequencies[c] - string.count(c) / len(string))
        for c in frequencies
    ])


In [6]:
text = "It's not only writers who can benefit from this free online tool. If you're a programmer who's working on a project where blocks of text are needed, this tool can be a great way to get that. It's a good way to test your programming and that the tool being created is working well. Above are a few examples of how the random paragraph generator can be beneficial. The best way to see if this random paragraph picker will be useful for your intended purposes is to give it a try. Generate a number of paragraphs to see if they are beneficial to your current project. If you do find this paragraph tool useful, please do us a favor and let us know how you're using it. It's greatly beneficial for us to know the different ways this tool is being used so we can improve it with updates. This is especially true since there are times when the generators we create get used in completely unanticipated ways from when we initially created them. If you have the time, please send us a quick note on what you'd like to see changed or added to make it better in the future. "*12
key = "ASDSADASDASDHOLA"

In [12]:
def cipher(text: str, key: str):
    alphabet = {char: idx  for idx, char in enumerate(sorted(freq.keys()))}
    inverse_alphabet = {idx: char  for idx, char in enumerate(sorted(freq.keys()))}
    N = len(freq)
    ciphertext = ""

    idx = 0
    for char in text.upper():
        if char in alphabet:
            ciphertext += inverse_alphabet[(alphabet[char] + alphabet[key[idx % len(key)].upper()]) % N]
            idx += 1
    
    return ciphertext


In [8]:
# ciphertext = cipher(text, key)
# break_rp(ciphertext, freq, abs_distance)

In [1]:
# for _, _, filenames in os.walk("./tests"):
#     for filename in filenames:
#         if filename.endswith(".txt"):
#             with open(os.path.join("./tests", filename), "r") as f:
#                 ciphertext, key = f.read().strip().split(" ")
#                 break_key = break_rp(ciphertext.upper(), freq, abs_distance)
#                 if key != break_key.lower():
#                     print(filename, key, break_key, ciphertext)
#                     print(len(key), len(break_key), len(ciphertext)/50)
#                     break
                
