In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

In [None]:
ciphertext = "JGRMQOYGHMVBJWRWQFPWHGFFDQGFPFZRKBEEBJIZQQOCIBZKLFAFGQVFZFWWEOGWOPFGFHWOLPHLRLOLFDMFGQWBLWBWQOLKFWBYLBLYLFSFLJGRMQBOLWJVFPFWQVHQWFFPQOQVFPQOCFPOGFWFJIGFQVHLHLROQVFGWJVFPFOLFHGQVQVFILEOGQILHQFQGIQVVOSFAFGBWQVHQWIJVWJVFPFWHGFIWIHZZRQGBABHZOCGFHX"
english_letter_freq = {
    'Letter': list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'),
    'Frequency': [8.167, 1.492, 2.782, 4.254, 12.702, 2.228, 2.015, 6.094, 6.966, 0.153, 0.772, 4.025, 2.406, 6.749, 7.507, 1.929, 0.095, 5.987, 6.327, 9.056, 2.758, 0.978, 2.360, 0.150, 1.974, 0.074]
}
english_freq = pd.DataFrame(english_letter_freq)
english_freq = english_freq.sort_values(by='Frequency', ascending=False).reset_index(drop=True)

common_words = ['THE', 'AND', 'IS', 'IT', 'AS', 'TO', 'IN', 'HE', 'OF', 'YOU', 'THAT', 'WAS', 'FOR', 'ON', 'ARE', 'WITH', 'HIS', 'THEY', 'I', 'AT', 'THAN']

In [46]:
def tabulate_freq(ciphertext: str) -> pd.DataFrame:
    char_counts = Counter(ciphertext)
    total_chars = len(ciphertext)
    freq = {char: (count / total_chars)*100 for char, count in char_counts.items()}
    freq_df = pd.DataFrame(list(freq.items()), columns=['Letter', 'Frequency'])
    freq_df = freq_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
    return freq_df

def get_closest_chars(cipher_frequency_table: pd.DataFrame, english_freq: float) -> pd.Series:
    closest_row = cipher_frequency_table.loc[cipher_frequency_table['Frequency'].sub(english_freq).abs().idxmin()]
    return closest_row


def get_most_common_ngrams(ciphertext: str, n=2, top_n=10) -> pd.DataFrame:
    ngrams = [ciphertext[i:i+n] for i in range(len(ciphertext) - n + 1) ]
    ngram_counts = Counter(ngrams)
    ngram_df = pd.DataFrame(ngram_counts.items(), columns=['Ngram', 'Count'])
    ngram_df = ngram_df.sort_values(by='Count', ascending=False).reset_index(drop=True)
    return ngram_df.head(top_n)

def get_most_common_repeated_digrams(ciphertext: str, n=2):

    ngrams = [ciphertext[i:i+n] for i in range(len(ciphertext) - n + 1) if len(set(ciphertext[i:i+n])) == 1 ]
    ngram_counts = Counter(ngrams)
    ngram_df = pd.DataFrame(ngram_counts.items(), columns=['Ngram', 'Count'])
    ngram_df = ngram_df.sort_values(by='Count', ascending=False).reset_index(drop=True)
    return ngram_df


def find_common_words(partial_message: str, common_words: list) -> list:
    found_words = []
    for word in common_words:
        if word in partial_message:
            found_words.append(word)
    return found_words

def get_decryption_key(english_freq: pd.DataFrame, cipher_freq_table: pd.DataFrame):
    key = {}

    key['F'] = 'E'
    key['W'] = 'S'
    
    common_digrams = get_most_common_ngrams(ciphertext, n=2)
    common_trigrams = get_most_common_ngrams(ciphertext, n=3)
    common_repeated_digrams = get_most_common_repeated_digrams(ciphertext)
    print(common_digrams, common_trigrams, common_repeated_digrams)
    the_trigram = common_trigrams[common_trigrams['Ngram'].str.endswith('F')].iloc[0]['Ngram'] 
    key[the_trigram[0]] = 'T'
    key[the_trigram[1]] = 'H'
    
    return key

def update_key_with_common_words(partial_message: str, key: dict, common_words: list) -> dict:
    words_in_message = partial_message.split('*')
    ciphertext_words = ciphertext.split('*')
    for idx, word_fragment in enumerate(words_in_message):
        if len(word_fragment) < 2:
            continue
        for common_word in common_words:
            if len(word_fragment) != len(common_word):
                continue
            possible_mapping = {}
            match = True
            for p_char, c_char in zip(common_word, word_fragment):
                if c_char != '*':
                    if c_char != p_char:
                        match = False
                        break
                else:
                    # Verificar si el carácter ya está asignado a otra letra
                    cipher_char = ciphertext[ciphertext.find(word_fragment) + word_fragment.index(c_char)]
                    if cipher_char in key and key[cipher_char] != p_char:
                        match = False
                        break
                    if p_char in key.values() and cipher_char not in key:
                        match = False
                        break
                    possible_mapping[cipher_char] = p_char
            if match:
                # Actualizar la clave con las nuevas asignaciones
                key.update(possible_mapping)
                break  # Salir después de encontrar una coincidencia
    return key

def iterative_decryption(ciphertext: str, key: dict, common_words: list, max_iterations=1000) -> str:
    for _ in range(max_iterations):
        partial_message = get_decrypted(ciphertext, key)
        print("Mensaje parcialmente descifrado:")
        print(partial_message)
        prev_key_size = len(key)
        key = update_key_with_common_words(partial_message, key, common_words)
        if len(key) == prev_key_size:
            # No se encontraron nuevas asignaciones, detener el proceso
            break
    return get_decrypted(ciphertext, key)


def get_decrypted(ciphertext: str, key: dict) -> str:
    message = ''
    for char in ciphertext:
        if char in key:
            message += key[char]
        else:
            message += '*'  # Puedes usar '*' o dejar el carácter cifrado
    return message


In [47]:
cipher_frequency_table =  tabulate_freq(ciphertext)
# print(cipher_frequency_table)
# print(english_freq)
key = get_decryption_key(english_freq, cipher_frequency_table)
# Paso 1: Tabular frecuencias
cipher_freq_table = tabulate_freq(ciphertext)

# Paso 2: Obtener clave inicial basada en frecuencias
key = get_decryption_key(english_freq, cipher_freq_table)

# Paso 3: Realizar descifrado iterativo
final_message = iterative_decryption(ciphertext, key, common_words)

# Paso 4: Mostrar el mensaje final
print("Mensaje final descifrado:")
print(final_message)

# plt.figure(figsize=(14, 7))

# # Plot English letter frequency
# plt.bar(english_freq['Letter'], english_freq['Frequency'], alpha=0.7, label='English Letter Frequency')

# # Plot cipher text letter frequency
# plt.bar(cipher_frequency_table['Letter'], cipher_frequency_table['Frequency'], alpha=0.7, label='Cipher Text Frequency')

# plt.xlabel('Letters')
# plt.ylabel('Frequency (%)')
# plt.title('Letter Frequency Comparison')
# plt.legend()
# plt.show()


  Ngram  Count
0    QV      9
1    FP      8
2    VF      7
3    GF      7
4    FW      5
5    FG      5
6    QO      5
7    PF      5
8    OL      5
9    WQ      4   Ngram  Count
0   QVF      4
1   FPF      4
2   VFP      4
3   QVH      3
4   JVF      3
5   WJV      3
6   FAF      2
7   FGQ      2
8   QOC      2
9   AFG      2   Ngram  Count
0    FF      2
1    EE      1
2    QQ      1
3    WW      1
4    VV      1
5    ZZ      1
  Ngram  Count
0    QV      9
1    FP      8
2    VF      7
3    GF      7
4    FW      5
5    FG      5
6    QO      5
7    PF      5
8    OL      5
9    WQ      4   Ngram  Count
0   QVF      4
1   FPF      4
2   VFP      4
3   QVH      3
4   JVF      3
5   WJV      3
6   FAF      2
7   FGQ      2
8   QOC      2
9   AFG      2   Ngram  Count
0    FF      2
1    EE      1
2    QQ      1
3    WW      1
4    VV      1
5    ZZ      1
Mensaje parcialmente descifrado:
****T*****H**S*STE*S**EE*T*E*E**********TT*******E*E*THE*ESS***S**E*E*S*********E**E*TS**S*ST***E