In [38]:
import json
from collections import Counter

from CryptoSystems.CryptoWrapper import CryptoWrapper

with open('./Data/alphabet', 'r') as f:
    alphabet = f.readline()
# end with

with open('./Data/fit_text', 'r') as f:
    text = f.readlines()
# end with

text = ''.join(i for i in text)
text = text.upper()
text.replace('\n', ' ')
text = ''.join(i for i in text if i in alphabet)

with open('./Data/fit_text', 'w') as f:
    f.write(text)
# end with 

len(text)

2120796

In [39]:
freq = {i : 0 for i in alphabet}
for i in text:
    freq[i] += 1
# end for

for key in freq:
    freq[key] /=  len(text)
# end for

freq = dict(sorted(freq.items(), key = lambda item : item[1], reverse = True))

with open('./Data/tolkin_freq.json', 'w') as json_file:
    json.dump(freq, json_file, indent = 4)
# end with 

In [40]:
def frequency_analysis(ciphertext : str) -> dict:
    freq = Counter(ciphertext)
    total_chars = len(ciphertext)
    for char in freq:
        freq[char] /= total_chars
    # end for
    return dict(freq)
# end def

def decrypt_simple_substitution(ciphertext : str, frequences : dict) -> tuple[str, str]:
    freq = frequency_analysis(ciphertext)
    freq = dict(sorted(freq.items(), key = lambda item : item[1], reverse = True))
    
    ciphertext_chars = [char for char in freq.keys()]

    key_mapping = {}
    for i, char in enumerate(ciphertext_chars):
        key_mapping[char] = list(frequences.keys())[i]
    # end for
    
    plaintext = ''.join(key_mapping[char] for char in ciphertext)
    
    return plaintext, key_mapping
# end def

In [41]:
def calculate_metrics(plaintext: str, 
                      original_text: str) -> tuple[float, float, float]:
    correct_chars = sum(1 for i in range(len(plaintext)) if plaintext[i] == original_text[i])
    metric = correct_chars / len(plaintext)

    return metric
# end def

In [42]:
def test_system(input_path : str, 
                freq_file : str, 
                save : bool = False
                ) -> tuple[float, str, str]:
    
    crypter = CryptoWrapper(method = 'Replace', 
                        do_encrypt = 'enc',
                        key_path = './Data/key_Replace',
                        input_path = input_path
                        )

    crypteed_text = crypter.encrypt()

    with open(freq_file, 'r') as json_file:
        freq = json.load(json_file)
    # end with

    plaintext, key_mapping = decrypt_simple_substitution(ciphertext = crypteed_text,
                                                         frequences = freq)
    
    if save:
        with open('./Data/freq_dectipt', 'w') as f:
            f.write(plaintext)
        # end with
        
        with open('./Data/key_freq', 'w') as f:
            f.write(''.join([key for key in key_mapping]))
        # end with
    # end if

    with open(input_path, 'r') as f:
        original_text = f.readline()
    # end with

    metric = calculate_metrics(plaintext = plaintext,
                                original_text = original_text)

    return metric, plaintext, key_mapping, len(original_text)
# end def

In [43]:
texts_to_test = (
    './Data/in',
    './Data/in_test_1',
    './Data/in_test_2',
    './Data/fit_text'
)

freq_jsons = ('./Data/custom_freq.json', './Data/tolkin_freq.json')

for i in texts_to_test:
    for j in freq_jsons:
        _ = test_system(i, j)
        print(f'accyracy={_[0]} for text={i}, for json={j}, len={_[-1]}')
    # end for
    print()
# end for

accyracy=0.4900900900900901 for text=./Data/in, for json=./Data/custom_freq.json, len=1110
accyracy=0.47927927927927927 for text=./Data/in, for json=./Data/tolkin_freq.json, len=1110

accyracy=0.48778384593273927 for text=./Data/in_test_1, for json=./Data/custom_freq.json, len=3479
accyracy=0.5903995400977292 for text=./Data/in_test_1, for json=./Data/tolkin_freq.json, len=3479

accyracy=0.6259688739700946 for text=./Data/in_test_2, for json=./Data/custom_freq.json, len=32770
accyracy=0.49496490692706746 for text=./Data/in_test_2, for json=./Data/tolkin_freq.json, len=32770

accyracy=0.6586060139683402 for text=./Data/fit_text, for json=./Data/custom_freq.json, len=2120796
accyracy=1.0 for text=./Data/fit_text, for json=./Data/tolkin_freq.json, len=2120796



In [44]:
_ = test_system(texts_to_test[-1], freq_jsons[0], save = True)