In [2]:
cypher_text = """ LYMBXJXKBBKBJPJJBBKBJPZPHYXNJKGOLOGPIEYHKBI
                  XJSOEPVJBPMBXJEPONKBIEJGJOEATPYOXUOBAJPTJLK
                  JDXGYLOEPKLKAKODKBPJDDKIJBAJXOPOGAKJBAJAYVS
                  MPJEGAKJBAJOSSDKJXVOPTJVOPKAGOBXEYFYPKAGLME
                  PTJEVYEJXNJVOKBPOKBGODOEIJBJPHYENYLKBXMGPEZ
                  SOEPBJEGPTEYMITPTJOHOEXHKBBKBINJHYENSEYIEOV
                  VJOBXPTEYMITYMEEJGJOEATAYDDOFYEOPKYBGPTJXJS
                  OEPVJBPSEYUKXJGJXMAOPKYBPTEYMITYBJFOATJDYEG
                  SEYIEOVVJOBXPHYVOGPJEGSEYIEOVVJGEJGJOEATOPP
                  TJXJSOEPVJBPYLXOPOGAKJBAJOBXNBYHDJXIJJBIKBJ
                  JEKBIGSOBGPTJXKGAKSDKBJGOBXKBPJELOAJGYLOEPK
                  LKAKODKBPJDDKIJBAJXOPOGAKJBAJAYVSMPJEGAKJBA
                  JOBXOSSDKJXVOPTJVOPKAGHJXJUJDYSBJHPYYDGOBXV
                  JPTYXYDYIKJGPYOXUOBAJPTJGJLKJDXGOPPTJGOVJPK
                  VJHJAYDDOFYEOPJHKPTOHKXJEOBIJYLKBGPKPMPJGFY
                  PTHKPTKBOBXYMPGKXJYLVOOGPEKATPMBKUJEGKPZOBX
                  HYENYBXKUJEGJOSSDKAOPKYBGKBADMXKBIKBPTJLKJD
                  XGYLTJODPTOBXVJXKAKBJDYIKGPKAGFKYDYIZOEPSTZ
                  GKAGAZFJEGJAMEKPZBJMEYGAKJBAJOBXJXMAOPKYB  """

In [3]:
def substitute_characters(text, substitutions):
    translation_table = str.maketrans(substitutions)

    substituted_text = text.translate(translation_table)

    return substituted_text

In [4]:
import random
import string
import pandas as pd

def create_random_substitutions_dict(text, previous_dict):
    english_alphabet = set(string.ascii_uppercase)

    remaining_letters = english_alphabet - set(previous_dict.keys())

    text_letters = [char for char in text.upper() if char.isalpha()]

    shuffled_letters = list(remaining_letters)
    random.shuffle(shuffled_letters)

    new_substitutions = dict(zip([char for char in text_letters if char not in previous_dict], shuffled_letters))

    updated_dict = {**previous_dict, **new_substitutions}

    return updated_dict

In [5]:
pip install english-words





[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [45]:
import csv
with open('word-freq-top5000.csv', 'r') as csv_file:
    reader = csv.DictReader(csv_file)
    word_set = set(row['Word'] for row in reader)
    
print(len(word_set))


4354


In [13]:
def contains_english_word(text):
    lowercased_text = text.lower()

    if lowercased_text in set(word_set):
        return True

    for word in word_set:
        if word in lowercased_text and len(word) > 4:
            return True, word

    return False, None

def is_english_word(word):
    return word.lower() in set(word_set)

def count_subwords(text, word_len):
    found_words = []
    text_len = len(text)

    i = 0
    while i <= text_len - word_len:
        substring = text[i:i+word_len]

        if is_english_word(substring):
            found_words.append(substring)
            text = text[:i] + text[i+word_len:]  # Remove the found word from the text
            text_len = len(text)  # Update the text length
        else:
            i += 1  

    return found_words, text


input_text = "deibdeboidhbhowareyoukejljl"
word_length = 3
result, text = count_subwords(input_text, word_length)

print(result, text)

['how', 'are', 'you'] deibdeboidhbkejljl


In [39]:
fitness_dict = {3:1, 4:10, 5:50, 6:100, 7:100, 8:100, 9:100, 10:100, 11:100}
def fitness(individual, text):
    deciphered_text = substitute_characters(text, individual)
    text = deciphered_text
    fitness = 0
    total_words = []

    for i in range(10, 2, -1):
        found_words, remaining_text = count_subwords(text, i)
        fitness += len(found_words) * fitness_dict[i]
        text = remaining_text
        total_words.extend(found_words)
        
    return fitness, total_words

# test_text = "OLSSVOVD"
# test_dict = {'O':'H', 'L':'E', 'S':'L','V':'O','D':'W'}
# print(fitness(test_dict, test_text))

In [46]:
def crossover(generation, size, text):
    parents = generation[:parents_size]
    new_gen = []
    for i in range(size):
        p = random.random() 
        if p < 0.7:
            first_individual = random.choice(parents)
            second_individual = random.choice(parents)

        else:
            first_individual = create_random_substitutions_dict(text, {})
            second_individual = create_random_substitutions_dict(text, {})
        new_individual = {}
        for key in first_individual.keys():
            new_individual[key] = random.choice([first_individual[key], second_individual[key]])
        new_gen.append(new_individual)
    return new_gen


def apply_mutation(generation):
    for individual in generation:
        p = random.random()  
        if p < mutation_rate:
            random_key = random.choice(list(individual.keys()))
            individual[random_key] = random.choice(string.ascii_uppercase)
    return generation


size = 100
parents_size = 20
mutation_rate = 0.5
small_cypher_text = "GKAGAZFJEGJAMEKPZBJMEYGAKJBAJOBXJXMAOPKYB"

converged = False
best_dict = {}
curr_gen = [create_random_substitutions_dict(small_cypher_text, {}) for i in range(size)]
max_fitness = 0
curr_fitness = 0
i = 0
tot_gen = 0

while not converged:
    sorted_gen = sorted(curr_gen, key=lambda x: fitness(x, small_cypher_text), reverse=True)
    curr_gen = apply_mutation(crossover(sorted_gen, size, small_cypher_text))
    curr_fitness, _ = fitness(curr_gen[0], small_cypher_text)
    i += 1
    tot_gen += 1
    for individual in sorted_gen[:3]:
        fit, total_words = fitness(individual, small_cypher_text)
        decyphered_text = substitute_characters(small_cypher_text, individual)
        print(f"Fitness: {fit}, Deciphered text: {decyphered_text}, Words found:{total_words}")

    print(f"Generations: {tot_gen}")
    if(curr_fitness > max_fitness):
        max_fitness = curr_fitness
        best_dict = sorted_gen[0]
        print(f"Best dict: {best_dict}")
        i = 0
    if(i == 30):
        curr_gen = [create_random_substitutions_dict(small_cypher_text, {}) for i in range(size)]


Fitness: 11, Deciphered text: FCMFMWHATFAMVTCLWNAVTYFMCANMAONXAXVMOLCYN, Words found:['WHAT', 'CAN']
Fitness: 11, Deciphered text: WHAWAYIRGWRAUGHSYCRUGQWAHRCAROCXRXUAOSHQC, Words found:['AWAY', 'CAR']
Fitness: 10, Deciphered text: VRFVFZGSYVSFNYROZLSNYAVFRSLFSOLXSXNFOORAL, Words found:['ORAL']
Generations: 1
Fitness: 12, Deciphered text: FPLFLWHATFALCTPNWNACTKFLPANLAONXAXCLONPKN, Words found:['WHAT', 'ACT', 'PAN']
Fitness: 11, Deciphered text: OMRORBGHOOHRXOMDBLHXONORMHLRHOLXHXXRODMNL, Words found:['NORM', 'ROD']
Fitness: 11, Deciphered text: LQDLDIWEALEDNAQMINENAFLDQENDEONXEXNDOMQFN, Words found:['MINE', 'END']
Generations: 2
Best dict: {'G': 'F', 'K': 'P', 'A': 'L', 'Z': 'W', 'F': 'H', 'J': 'A', 'E': 'T', 'M': 'C', 'P': 'N', 'B': 'N', 'Y': 'K'}
Fitness: 12, Deciphered text: ZCLZLWHATZALCTCNWNACTGZLCANLAONXAXCLONCGN, Words found:['WHAT', 'ACT', 'CAN']
Fitness: 12, Deciphered text: LBTLTIIETLETVTBPINEVTYLTBENTEONXEXVTOPBYN, Words found:['PINE', 'LET', 'TOP']
Fitness: 11, Deciphered te