In [None]:
!pip install nltk

In [None]:
cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."

In [None]:
import string
from collections import Counter
import re

ENGLISH_FREQS = [
    0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015,  # A-G
    0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749,  # H-N
    0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758,  # O-U
    0.00978, 0.02360, 0.00150, 0.01974, 0.00074                     # V-Z
]

TWO_LEN_WORD = [
    "as", "at", "be", "he", "if", "in", 
    "is", "it", "of", "to"
]

THREE_LEN_WORD = [
    "and", "are", "but", "for", "get", 
    "not", "one", "the", "too", "was"
]

COMMON_WORD = TWO_LEN_WORD + THREE_LEN_WORD

In [None]:
{word for word in set(cipher.split(" ")) if 2 <= len(word) <= 3}

In [None]:
import itertools

all_words = TWO_LEN_WORD + THREE_LEN_WORD
combinations = list(itertools.product(all_words, repeat=len(cipher)))

all_substitutions = []
for combo in combinations:
    mapping = dict(zip(cipher_words, combo))
    substituted_words = {mapping[word] for word in cipher_words}
    all_substitutions.append(substituted_words)

for i, substitution in enumerate(all_substitutions):
    print(f"Combination {i + 1}: {substitution}")


In [None]:
def decrypt_vigenere(ciphertext):
    cleaned_text = ''.join(filter(str.isalpha, ciphertext.upper()))
    key_length = guess_key_length(cleaned_text)
    key = find_key(cleaned_text, key_length)
    return decrypt(ciphertext, key)

def guess_key_length(text, max_length=20):
    def index_of_coincidence(step):
        subtext = text[::step]
        n = len(subtext)
        freqs = Counter(subtext)
        return sum(count * (count - 1) for count in freqs.values()) / (n * (n - 1))

    return max(range(1, max_length + 1), key=index_of_coincidence)

def find_key(text, key_length):
    def find_shift(column):
        freqs = Counter(column)
        chi_squares = [sum((freqs.get(chr((i + shift) % 26 + 65), 0) / len(column) - ENGLISH_FREQS[i]) ** 2 / ENGLISH_FREQS[i]
                           for i in range(26)) for shift in range(26)]
        return chi_squares.index(min(chi_squares))

    return ''.join(chr(find_shift(text[i::key_length]) + 65) for i in range(key_length))

def decrypt(ciphertext, key):
    return ''.join(chr((ord(c) - ord(key[i % len(key)]) + 26) % 26 + 65) if c.isalpha() else c
                   for i, c in enumerate(ciphertext.upper()))

def improve_decryption(decrypted, max_iterations=1000):
    best_score = float('inf')
    best_text = decrypted

    for _ in range(max_iterations):
        improved = best_text
        for word in COMMON_WORDS:
            pattern = r'\b' + '.' * len(word) + r'\b'
            improved = re.sub(pattern, word, improved, count=1)

        score = score_text(improved)
        if score < best_score:
            best_score = score
            best_text = improved
        else:
            break  # Stop if no improvement

    return best_text

def score_text(text):
    words = text.split()
    unknown_words = sum(1 for word in words if word not in COMMON_WORDS)
    return unknown_words / len(words)

if __name__ == "__main__":

    decrypted = decrypt_vigenere(ciphertext)
    print(f"Initial decryption: {decrypted}")
    improved = improve_decryption(decrypted)
    print(f"Improved decryption: {improved}")

In [None]:
import nltk
from nltk.corpus import words

nltk.download('words')

cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZKMOLTROE."

cipher_words = cipher.replace('.', '').split()

nltk_words = set(words.words())

def word_pattern(word):
    pattern = {}
    return [pattern.setdefault(char, len(pattern)) for char in word]

def find_matching_words(cipher_word):
    cipher_pattern = word_pattern(cipher_word)
    matches = [word for word in nltk_words if len(word) == len(cipher_word) and word_pattern(word.upper()) == cipher_pattern]
    return matches

for cipher_word in cipher_words:
    matches = find_matching_words(cipher_word)
    print(f"Cipher word '{cipher_word}' matches: {matches}")


In [None]:
import nltk
from nltk.corpus import words
from nltk.probability import FreqDist

nltk.download('words')
nltk.download('brown')

cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."

cipher_words = cipher.replace('.', '').split()

nltk_words = set(word.lower() for word in words.words())

from nltk.corpus import brown
word_freq = FreqDist([word.lower() for word in brown.words()])

def word_pattern(word):
    pattern = {}
    return [pattern.setdefault(char, len(pattern)) for char in word.lower()]


def find_matching_words(cipher_word):
    cipher_pattern = word_pattern(cipher_word)
    matches = [word for word in nltk_words if len(word) == len(cipher_word) and word_pattern(word) == cipher_pattern]
    sorted_matches = sorted(matches, key=lambda x: word_freq[x], reverse=True)
    return sorted_matches


for cipher_word in cipher_words:
    matches = find_matching_words(cipher_word)
    print(f"Cipher word '{cipher_word}' matches: {matches}")


In [None]:
import nltk
from nltk.corpus import words
from nltk.probability import FreqDist
import re

nltk.download('words')
nltk.download('brown')

cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."

cipher_words = cipher.replace('.', '').split()
nltk_words = set(word.lower() for word in words.words())

from nltk.corpus import brown
word_freq = FreqDist([word.lower() for word in brown.words()])

def create_pattern(substitution_pattern):
    return ''.join('*' if char.isupper() else char for char in substitution_pattern)

def find_matching_words(substitution_pattern):
    pattern = create_pattern(substitution_pattern)
    
    regex_pattern = '^' + pattern.replace('*', '.') + '$' 
    matches = [word for word in nltk_words if re.fullmatch(regex_pattern, word)]
    
    sorted_matches = sorted(matches, key=lambda x: word_freq[x], reverse=True)
    return sorted_matches

substitution_pattern = 'LIU'  

matches = find_matching_words(substitution_pattern)
print(f"Cipher with pattern '{substitution_pattern}' matches: {matches}")


In [None]:
def solve_vigrene(text: str):
    chunks = sorted(set(text.replace(".", "").split(" ")), key=len)[::-1]
    
    def iterate_subs(chunk: list[str], index: int):
        
        print(chunk, index)
        
        if "".join(chunk).islower():
            print(chunk)
            return chunk
        
        if (chunk[index].islower()):
            iterate_subs(chunk, index + 1)
        
        matches = find_matching_words(chunk[index])
        if len(matches) == 0:
            return ""
        
        for match in matches:
            if match in chunk:
                continue
            
            subbed_chunk = []
            for text in chunk:
                sub_text = text
                for i in range(len(match)):
                    sub_text = sub_text.replace(chunk[index][i], match[i])
                    
                subbed_chunk.append(sub_text)
                
            candidate_answer = iterate_subs(subbed_chunk, index + 1)
            if candidate_answer == "":
                continue
                
    iterate_subs(chunks, 0)
                    
    
solve_vigrene(cipher)

In [None]:
match = ['the', 'and', 'was', 'for', 'his', 'had', 'not', 'are', 'but', 'one', 'you', 'her', 'all', 'she', 'him', 'who', 'out', 'its', 'can', 'new', 'two', 'may', 'any', 'now', 'our', 'man', 'did', 'way', 'how', 'too', 'see', 'own', 'men', 'get', 'day', 'old', 'off', 'few', 'use', 'say', 'got', 'war', 'put', 'far', 'yet', 'set', 'end', 'why', 'let', 'per', 'big', 'saw', 'god', 'law', 'act', 'car', 'air', 'ago', 'boy', 'job', 'age', 'six', 'run', 'art', 'top', 'tax', 'red', 'nor', 'cut', 'low', 'pay', 'son', 'ten', 'sat', 'yes', 'bad', 'due', 'try', 'lay', 'aid', 'ran', 'led', 'met', 'hot', 'ask', 'bed', 'lot', 'eye', 'gun', 'hit', 'sun', 'bit', 'gas', 'sea', 'sir', 'oil', 'arm', 'key', 'add', 'sex', 'bar', 'sam', 'fit', 'dog', 'die', 'san', 'buy', 'box', 'dry', 'won', 'sit', 'tom', 'eat', 'fat', 'lie', 'sky', 'leg', 'bay', 'hat', 'joe', 'win', 'sin', 'wet', 'guy', 'via', 'cry', 'cup', 'ice', 'sum', 'fun', 'odd', 'raw', 'bag', 'arc', 'fed', 'joy', 'bob', 'mad', 'aim', 'roy', 'jim', 'row', 'pat', 'sad', 'net', 'bus', 'lee', 'fly', 'mud', 'van', 'gay', 'era', 'ann', 'ear', 'tea', 'cow', 'jet', 'pot', 'hal', 'cap', 'dan', 'ill', 'zen', 'jew', 'fog', 'tim', 'don', 'tie', 'gin', 'cat', 'pip', 'bid', 'tip', 'hen', 'ben', 'kay', 'shu', 'wit', 'doc', 'bet', 'ray', 'hay', 'ham', 'rid', 'dim', 'lid', 'rob', 'lap', 'eve', 'fan', 'pen', 'sue', 'bat', 'lip', 'rod', 'tap', 'lao', 'gap', 'ada', 'bod', 'lit', 'pin', 'pro', 'jar', 'ate', 'pan', 'jaw', 'fee', 'mae', 'ace', 'dug', 'oak', 'jay', 'cop', 'amy', 'dad', 'bow', 'nut', 'hey', 'apt', 'pie', 'gum', 'fix', 'wax', 'pit', 'ego', 'fur', 'rug', 'hut', 'par', 'mix', 'shy', 'nam', 'map', 'tub', 'lou', 'ton', 'dot', 'thy', 'cab', 'tar', 'icy', 'fox', 'max', 'tin', 'ken', 'egg', 'nod', 'log', 'del', 'rev', 'beg', 'ash', 'quo', 'bee', 'hub', 'rag', 'dig', 'hip', 'owe', 'non', 'mob', 'spy', 'ivy', 'zoo', 'inn', 'tan', 'bin', 'toe', 'han', 'bud', 'jed', 'mel', 'yin', 'sac', 'pad', 'flu', 'rot', 'foe', 'pet', 'pam', 'pig', 'jig', 'pop', 'leo', 'ted', 'con', 'mao', 'ban', 'ink', 'bum', 'boa', 'dag', 'axe', 'jam', 'ant', 'sew', 'dip', 'sax', 'ion', 'bey', 'rub', 'rue', 'rip', 'gyp', 'pry', 'jug', 'lad', 'web', 'cod', 'rat', 'hum', 'rig', 'ass', 'tee', 'sub', 'coe', 'huh', 'abo', 'ole', 'las', 'yow', 'cox', 'aft', 'awe', 'woe', 'rex', 'sly', 'gal', 'mat', 'rim', 'tag', 'min', 'ski', 'wry', 'poe', 'hem', 'mar', 'gee', 'gem', 'gag', 'wee', 'dam', 'eta', 'toy', 'nap', 'bug', 'tao', 'sag', 'ron', 'ike', 'rio', 'ado', 'bel', 'coy', 'bam', 'lew', 'git', 'jan', 'pah', 'dow', 'peg', 'vic', 'rye', 'yen', 'nip', 'lab', 'sis', 'bop', 'sur', 'ape', 'tug', 'lax', 'mid', 'les', 'mop', 'lag', 'abe', 'woo', 'sow', 'hug', 'elm', 'rum', 'len', 'gus', 'hog', 'dew', 'pod', 'sol', 'sod', 'paw', 'yea', 'ory', 'pee', 'eel', 'nun', 'sip', 'och', 'maw', 'urn', 'kit', 'lex', 'kin', 'hun', 'yuh', 'fin', 'spa', 'gym', 'qua', 'nab', 'fig', 'reb', 'foy', 'biz', 'eli', 'wed', 'sic', 'lug', 'gel', 'bye', 'fir', 'pal', 'psi', 'fad', 'pup', 'das', 'phi', 'bea', 'meg', 'bon', 'rap', 'ram', 'den', 'owl', 'ida', 'wei', 'hop', 'hon', 'dey', 'ira', 'nay', 'hex', 'nan', 'wan', 'alf', 'keg', 'mah', 'fry', 'bib', 'vow', 'ani', 'jon', 'sap', 'mot', 'nat', 'wes', 'pap', 'nob', 'mor', 'aye', 'cud', 'bah', 'tai', 'gog', 'ist', 'wig', 'ana', 'soy', 'cal', 'zip', 'yon', 'hob', 'moi', 'mew', 'mon', 'din', 'sid', 'gab', 'gil', 'gig', 'tow', 'vet', 'mem', 'tab', 'wow', 'lev', 'ire', 'hap', 'eva', 'wop', 'mig', 'ito', 'oft', 'amt', 'fra', 'wac', 'bun', 'mag', 'rok', 'lop', 'cur', 'ich', 'mum', 'aku', 'kob', 'bal', 'wod', 'gob', 'sou', 'tat', 'ade', 'mug', 'nae', 'pax', 'ewe', 'soe', 'haw', 'jab', 'lob', 'pow', 'sop', 'ind', 'goa', 'err', 'cam', 'ama', 'obe', 'elk', 'vex', 'rut', 'jot', 'hue', 'jag', 'pub', 'mal', 'bas', 'naw', 'nil', 'ere', 'gut', 'roi', 'tau', 'chi', 'dun', 'ova', 'lui', 'roe', 'rib', 'ale', 'doe', 'ebb', 'boo', 'ell', 'hoy', 'orb', 'dos', 'bog', 'fay', 'mac', 'che', 'liz', 'sup', 'wyn', 'mrs', 'sie', 'yip', 'pun', 'dud', 'ugh', 'loy', 'lak', 'fob', 'iyo', 'ges', 'aht', 'pur', 'zar', 'sil', 'yun', 'kop', 'gup', 'bur', 'mib', 'ino', 'ing', 'ret', 'gey', 'kip', 'waw', 'tol', 'hwa', 'reg', 'syd', 'kee', 'hox', 'neb', 'gor', 'uro', 'cum', 'aba', 'tew', 'kos', 'tib', 'mou', 'gim', 'tur', 'awa', 'kea', 'ore', 'mer', 'bub', 'kyl', 'eon', 'yer', 'mes', 'kyu', 'ods', 'dop', 'ers', 'emu', 'ura', 'vag', 'twa', 'yak', 'lai', 'vim', 'tae', 'ref', 'lye', 'wut', 'saj', 'hud', 'ume', 'ree', 'ket', 'oes', 'rit', 'ait', 'mod', 'hia', 'lis', 'rab', 'ail', 'kua', 'lan', 'oar', 'pic', 'ahu', 'tyt', 'eyn', 'deg', 'fey', 'col', 'yep', 'pon', 'nth', 'ped', 'mam', 'oxy', 'hic', 'wog', 'loo', 'vip', 'wot', 'fro', 'pix', 'aum', 'ras', 'pyx', 'emm', 'coo', 'fod', 'nep', 'fez', 'ism', 'foo', 'oki', 'nou', 'fut', 'cos', 'ark', 'aly', 'pac', 'kan', 'ort', 'voe', 'lam', 'ast', 'ssi', 'ger', 'yor', 'hag', 'sob', 'pep', 'hin', 'mwa', 'nib', 'owk', 'aga', 'ria', 'luc', 'fud', 'vau', 'gen', 'tst', 'lux', 'sla', 'aln', 'dib', 'cog', 'fot', 'iba', 'abb', 'dha', 'ila', 'pho', 'zel', 'ged', 'toa', 'dae', 'obi', 'tck', 'dis', 'hoi', 'pst', 'tua', 'sec', 'pim', 'teg', 'feu', 'wup', 'jun', 'fae', 'rah', 'aix', 'zat', 'lek', 'reh', 'cig', 'cue', 'gip', 'gau', 'ona', 'gnu', 'vis', 'ush', 'gan', 'sov', 'pol', 'mim', 'wob', 'jos', 'gio', 'sia', 'dub', 'wem', 'wae', 'poy', 'sab', 'blo', 'tch', 'ala', 'wen', 'fag', 'oii', 'zac', 'tou', 'hao', 'flo', 'bra', 'taa', 'coz', 'yaw', 'gif', 'yok', 'jur', 'tji', 'dar', 'aby', 'wim', 'jut', 'pyr', 'neo', 'nee', 'cro', 'goo', 'geo', 'gat', 'hah', 'rud', 'hew', 'lac', 'sri', 'poi', 'koi', 'azo', 'eld', 'wiz', 'pes', 'yus', 'kaw', 'bes', 'tho', 'oda', 'ned', 'tog', 'aam', 'poa', 'seg', 'mho', 'lim', 'gaw', 'suu', 'tal', 'osc', 'udo', 'twi', 'duo', 'baw', 'kha', 'sus', 'sal', 'oat', 'yex', 'alt', 'ita', 'div', 'baa', 'khu', 'edo', 'odz', 'lat', 'laz', 'ked', 'dee', 'dak', 'tri', 'sty', 'sma', 'uds', 'roc', 'dob', 'swa', 'ima', 'wer', 'cor', 'grr', 'tam', 'fon', 'uca', 'wun', 'wab', 'yot', 'mil', 'ihi', 'ule', 'jog', 'zea', 'tox', 'awl', 'ibo', 'huk', 'nul', 'stu', 'lif', 'iao', 'fam', 'rik', 'mog', 'toi', 'ase', 'caw', 'pul', 'ula', 'cho', 'tay', 'zer', 'owd', 'yis', 'kou', 'vie', 'zep', 'dal', 'vat', 'alk', 'moy', 'pug', 'fop', 'gra', 'oho', 'ilk', 'ons', 'asp', 'oka', 'sim', 'dux', 'yox', 'noy', 'bab', 'fow', 'iso', 'goy', 'pua', 'fib', 'mus', 'kil', 'tup', 'irk', 'ley', 'keb', 'alp', 'jat', 'yah', 'sud', 'yoy', 'suz', 'tad', 'sny', 'lut', 'mas', 'ora', 'ull', 'ami', 'ake', 'zag', 'ust', 'lin', 'kat', 'urd', 'fou', 'wur', 'tez', 'rox', 'lwo', 'dao', 'ara', 'sog', 'ata', 'lea', 'edh', 'aha', 'utu', 'yam', 'aer', 'lod', 'tra', 'pom', 'gid', 'elf', 'moo', 'rep', 'iva', 'wha', 'tun', 'sah', 'wro', 'eer', 'dah', 'kef', 'nim', 'oam', 'jib', 'kui', 'iwa', 'ply', 'pir', 'gib', 'leu', 'rad', 'taw', 'vog', 'raj', 'nix', 'cid', 'tha', 'pik', 'soh', 'wey', 'hup', 'fei', 'cha', 'urf', 'ssu', 'deb', 'pud', 'kim', 'adz', 'jem', 'goi', 'fet', 'rix', 'arx', 'opt', 'nei', 'wah', 'aro', 'gez', 'agy', 'bae', 'aho', 'naa', 'pus', 'sok', 'yas', 'rhe', 'vod', 'gue', 'dap', 'zed', 'cag', 'rog', 'rho', 'bis', 'ulu', 'auk', 'vei', 'wye', 'cyp', 'mya', 'ean', 'jin', 'ido', 'lar', 'ava', 'uva', 'tyg', 'zig', 'fen', 'rax', 'koa', 'sye', 'elb', 'liv', 'aru', 'yao', 'dod', 'cep', 'wud', 'taj', 'uta', 'soc', 'hak', 'udi', 'moe', 'pya', 'abu', 'imu', 'ame', 'jef', 'nea', 'kai', 'dev', 'wid', 'ife', 'bap', 'imp', 'meo', 'gur', 'loa', 'sak', 'nub', 'ach', 'vol', 'ide', 'ako', 'tor', 'zoa', 'ode', 'guz', 'tui', 'gad', 'ofo', 'sho', 'zan', 'mau', 'til', 'yap', 'ump', 'lox', 'bom', 'noa', 'tum', 'ess', 'cwm', 'hau', 'tot', 'yed', 'dom', 'tid', 'yan', 'cay', 'auh', 'bor', 'imi', 'eke', 'wis', 'ope', 'kor', 'sen', 'nye', 'dhu', 'mir', 'ijo', 'dup', 'rus', 'rux', 'sot', 'cub', 'uji', 'lue', 'tig', 'gos', 'ock', 'fid', 'gon', 'orf', 'rie', 'vum', 'yeo', 'ser', 'una', 'vas', 'phu', 'dor', 'nef', 'sib', 'yee', 'daw', 'ute', 'uke', 'sar', 'zee', 'alb', 'bim', 'pau', 'rea', 'ure', 'pea', 'avo', 'lur', 'hod', 'wap', 'kaj', 'pia', 'nid', 'arn', 'kon', 'wea', 'yid', 'apa', 'lof', 'yew', 'ose', 'hei', 'ubi', 'yez', 'ker', 'ler', 'jow', 'ayu', 'sig', 'urs', 'nag', 'wag', 'gul', 'elt', 'val', 'dum', 'kol', 'eft', 'uru', 'mru', 'tue', 'oaf', 'zax', 'hep', 'wat', 'cob', 'ber', 'sai', 'mow', 'suk', 'woy', 'nig', 'kex', 'pox', 'wir', 'gaj', 'sey', 'aes', 'nit', 'ohm', 'els', 'jud', 'ric', 'bot', 'ary', 'fip', 'yoi', 'orc', 'awn', 'aka', 'hie', 'gud', 'tic', 'ian', 'jap', 'cot', 'dit', 'ens', 'tye', 'erd', 'gar', 'kra', 'rel', 'ouf', 'mev', 'ati', 'kaf', 'oer', 'gaz', 'ave', 'fub', 'eme', 'cit', 'nog', 'vai', 'hui', 'fie', 'yad', 'vug', 'pew', 'sao', 'mab', 'yar', 'dab', 'uri', 'nar', 'umu', 'mux', 'aal', 'olm', 'gol', 'het', 'ady', 'sha', 'dye', 'nak', 'gie', 'shi', 'waf', 'lys', 'cee', 'alo', 'gam', 'kru', 'tod', 'mun', 'upo', 'dol', 'bos', 'awd', 'hsi', 'oto', 'zak', 'lei', 'yat', 'luo', 'tut', 'lum', 'hyp', 'cly', 'wad', 'tit', 'hoe', 'asa', 'fum', 'poh', 'tav', 'pob', 'bac', 'tux', 'yoe', 'tec', 'yom', 'sui', 'saa', 'erg', 'rua', 'aus', 'cad', 'kep', 'vee', 'zad', 'gyn']
print("old" in match)

In [None]:
from nltk import ngrams
from nltk.corpus import reuters

nltk.download('reuters')
bigrams = list(ngrams(reuters.words(), 2))
bigram_freq = nltk.FreqDist(bigrams)

def evaluate_decryption(decrypted_message):
    words = decrypted_message.split()
    message_bigrams = list(ngrams(words, 2))
    score = sum(bigram_freq[bigram] for bigram in message_bigrams)
    return score  

In [None]:
cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."
answer = "SECURITY IS THE FIRST CAUSE OF MISFORTUNE. THIS IS AN OLD GERMAN PROVERB."

In [None]:
answer.lower()

In [None]:
evaluate_decryption('security is the first cause of misfortune. this is an old german proverb.')

In [None]:
import nltk
from nltk.corpus import words
from nltk.probability import FreqDist
from nltk import ngrams
from nltk.corpus import reuters
from nltk.corpus import brown
import re

nltk.download('words')
nltk.download('brown')
nltk_words = set(word.lower() for word in words.words())

word_freq = FreqDist([word.lower() for word in brown.words()])

def create_pattern(substitution_pattern):
    return ''.join('*' if char.isupper() else char for char in substitution_pattern)

def find_matching_words(substitution_pattern):
    pattern = create_pattern(substitution_pattern)
    
    regex_pattern = '^' + pattern.replace('*', '.') + '$' 
    matches = [word for word in nltk_words if re.fullmatch(regex_pattern, word)]
    
    sorted_matches = sorted(matches, key=lambda x: word_freq[x], reverse=True)
    return sorted_matches


nltk.download('reuters')
bigrams = list(ngrams(reuters.words(), 2))
bigram_freq = nltk.FreqDist(bigrams)

def evaluate_decryption(decrypted_message):
    words = decrypted_message.split()
    message_bigrams = list(ngrams(words, 2))
    score = sum(bigram_freq[bigram] for bigram in message_bigrams)
    return score  


def solve_vigrene(text: str):
    chunks = sorted(set(text.replace(".", "").split(" ")), key=len, reverse=True)
    
    def iterate_subs(chunk: list[str], index: int) -> list[str]:
        nonlocal chunks, text
        
        print(chunks)
        print(chunk)
        
        possible_answers = []

        if all(word.islower() for word in chunk):
            copy_text = text  
            for i in range(len(chunk)):
                copy_text = copy_text.replace(chunks[i], chunk[i])  
                
            possible_answers.append(copy_text)
            return possible_answers
        
        if chunk[index].islower():
            return iterate_subs(chunk, index + 1)
        
        matches = find_matching_words(chunk[index])
        if not matches:
            return []
            
            subbed_chunk = []
            for word in chunk:
                sub_text = word
                for i in range(len(match)):
                    sub_text = sub_text.replace(chunk[index][i], match[i])
                subbed_chunk.append(sub_text)
            
            candidate_answers = iterate_subs(subbed_chunk, index + 1)
            possible_answers.extend(candidate_answers)
        
        return possible_answers
    
    possible_texts = iterate_subs(chunks, 0)
    print(possible_texts)
    best_text = max(possible_texts, key=lambda x: evaluate_decryption(x))
    print(f"Best decrypted text: {best_text}")


cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."

solve_vigrene(cipher)


In [None]:
def write_list_to_file(strings: list[str], filename: str):
    with open(filename, 'w') as file:
        for line in strings:
            file.write(line + '\n')


filename = "output.txt"
write_list_to_file(dicipher, filename)


In [None]:
import nltk
from nltk.util import ngrams
from nltk.corpus import words as nltk_words
from nltk.probability import FreqDist
from collections import Counter
import string


nltk.download('words')
nltk.download('brown')


english_vocab = set(nltk_words.words())


bigram_freq = FreqDist(nltk.bigrams(nltk.corpus.brown.words()))

def evaluate_decryption(decrypted_message):
    
    words = decrypted_message.split()

    
    english_word_count = sum(1 for word in words if word.lower().strip(string.punctuation) in english_vocab)
    english_word_ratio = english_word_count / len(words) if words else 0

    
    message_bigrams = list(ngrams(words, 2))
    bigram_score = sum(bigram_freq[bigram] for bigram in message_bigrams)

    
    avg_word_length = sum(len(word) for word in words) / len(words) if words else 0

    
    score = english_word_ratio * 100 + bigram_score * 0.01 - abs(4.5 - avg_word_length) * 10
    
    return score, english_word_ratio, avg_word_length, bigram_score


decrypted_message = "This is a sample decoded message to test the function."
score, english_word_ratio, avg_word_length, bigram_score = evaluate_decryption(decrypted_message)
print(f"Score: {score}")
print(f"English Word Ratio: {english_word_ratio}")
print(f"Average Word Length: {avg_word_length}")
print(f"Bigram Score: {bigram_score}")


In [None]:
import nltk

# Assuming evaluate_decryption is already defined as shown earlier

def find_best_sentence(filename: str):
    with open(filename, 'r') as file:
        sentences = file.readlines()
    
    best_sentence = None
    best_bigram_score = float('-inf')
    
    for sentence in sentences:
        score, english_word_ratio, avg_word_length, bigram_score = evaluate_decryption(sentence.strip())
        print(f"Evaluating: {sentence.strip()}")
        print(f"Score: {score}")
        print(f"English Word Ratio: {english_word_ratio}")
        print(f"Average Word Length: {avg_word_length}")
        print(f"Bigram Score: {bigram_score}\n")
        
        if bigram_score > best_bigram_score and english_word_ratio == 1:
            best_bigram_score = bigram_score
            best_sentence = sentence.strip()
    
    return best_sentence, best_bigram_score

# Example usage
best_sentence, best_bigram_score = find_best_sentence('output.txt')
print(f"Best Sentence: {best_sentence}")
print(f"Best Bigram Score: {best_bigram_score}")


In [None]:
import nltk
from nltk.corpus import words
from nltk.probability import FreqDist
from nltk import ngrams
from nltk.corpus import reuters, brown
import re
from tqdm import tqdm

nltk.download('words')
nltk.download('brown')
nltk.download('reuters')

nltk_words = set(word.lower() for word in words.words())
word_freq = FreqDist([word.lower() for word in brown.words()])

def create_pattern(substitution_pattern):
    return ''.join('*' if char.isupper() else char for char in substitution_pattern)

def find_matching_words(substitution_pattern):
    pattern = create_pattern(substitution_pattern)
    regex_pattern = '^' + pattern.replace('*', '.') + '$' 
    matches = [word for word in nltk_words if re.fullmatch(regex_pattern, word)]
    sorted_matches = sorted(matches, key=lambda x: word_freq[x], reverse=True)
    return sorted_matches

bigrams = list(ngrams(reuters.words(), 2))
bigram_freq = FreqDist(bigrams)

def evaluate_decryption(decrypted_message):
    words = decrypted_message.split()
    message_bigrams = list(ngrams(words, 2))
    score = sum(bigram_freq[bigram] for bigram in message_bigrams)
    return score  

def solve_vigrene(text: str):
    chunks = sorted(set(text.replace(".", "").split(" ")), key=len, reverse=True)
    
    def iterate_subs(chunk: list[str], index: int) -> list[str]:
        nonlocal chunks, text
        possible_answers = []

        if all(word.islower() for word in chunk):
            copy_text = text
            for i in range(len(chunk)):
                copy_text = copy_text.replace(chunks[i], chunk[i])
            possible_answers.append(copy_text)
            return possible_answers
        
        if index >= len(chunk): 
            return []
        
        if chunk[index].islower():  
            return iterate_subs(chunk, index + 1)
        
        matches = find_matching_words(chunk[index])
        if not matches:
            return []
        
        for match in tqdm(matches, desc=f"Processing word"):
            if any(c in ''.join(chunk).lower() for c in match):
                continue
            
            subbed_chunk = []
            for word in chunk:
                sub_text = word
                for i in range(min(len(match), len(chunk[index]))):
                    sub_text = sub_text.replace(chunk[index][i], match[i])
                subbed_chunk.append(sub_text)
            
            candidate_answers = iterate_subs(subbed_chunk, index + 1)
            possible_answers.extend(candidate_answers)
        
        return possible_answers
    
    possible_texts = iterate_subs(chunks, 0)
    return possible_texts

cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."
decrypted_texts = solve_vigrene(cipher)

for text in decrypted_texts:
    print(text)


In [None]:
cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."
answer = "SECURITY IS THE FIRST CAUSE OF MISFORTUNE. THIS IS AN OLD GERMAN PROVERB."

In [None]:
import nltk
from nltk.corpus import words
from nltk.probability import FreqDist
from nltk import ngrams
from nltk.corpus import reuters, brown
import re
from tqdm import tqdm

nltk.download('words')
nltk.download('brown')
nltk.download('reuters')

nltk_words = set(word.lower() for word in words.words())
word_freq = FreqDist([word.lower() for word in brown.words()])

def create_pattern(substitution_pattern):
    return ''.join('*' if char.isupper() else char for char in substitution_pattern)

def find_matching_words(substitution_pattern):
    pattern = create_pattern(substitution_pattern)
    regex_pattern = '^' + pattern.replace('*', '.') + '$' 
    matches = [word for word in nltk_words if re.fullmatch(regex_pattern, word)]
    sorted_matches = sorted(matches, key=lambda x: word_freq[x], reverse=True)
    return sorted_matches

bigrams = ngrams(brown.words(), 2)
bigram_freq = defaultdict(int)
for bigram in bigrams:
    bigram_freq[bigram] += 1

brown_words_set = set(word.lower() for word in brown.words())
frequency_order = 'etaoinshrdlcumfwygpbvkxjqz'[::-1]

def evaluate_decryption(decrypted_message):
    words = decrypted_message.split()
    message_bigrams = list(ngrams(words, 2))
    bigram_score = sum(bigram_freq.get(bigram, 0) for bigram in message_bigrams)
    english_word_ratio = sum(1 for word in words if word.lower().replace(".", "") in brown_words_set) / len(words)
    char_score = sum(frequency_order.index(char.lower()) for char in decrypted_message if char.lower() in frequency_order)  
    return bigram_score, english_word_ratio, char_score

def validate_mapping(cipher: list[str], dicipher: list[str]) -> bool:
    mapping = dict()
    join_cipher = "".join(cipher)
    join_dicipher = "".join(dicipher)
    
    return len(set(join_cipher)) == len(set(join_dicipher)) # Should be 1-1        
    
def solve_vigrene(text: str):
    chunks = sorted(set(text.replace(".", "").split(" ")), key=len, reverse=True)
    
    def iterate_subs(chunk: list[str], index: int) -> list[str]:
        if all(word.islower() for word in chunk):
            if not validate_mapping(chunks, chunk):
                return []
            copy_text = text
            for i, word in enumerate(chunk):
                copy_text = copy_text.replace(chunks[i], word)
            return [copy_text]
        
        if index >= len(chunk):
            return []
        
        if chunk[index].islower():
            return iterate_subs(chunk, index + 1)
        
        matches = find_matching_words(chunk[index])
        if not matches:
            return []

        possible_answers = []
        used_matches = set()
        
        for match in tqdm(matches, desc=f"Processing chunk: "):
            if match in used_matches:
                continue
            
            used_matches.add(match)
            
            subbed_chunk = []
            for word in chunk:
                sub_text = word
                if match:
                    sub_text = sub_text.translate(str.maketrans(chunk[index], match))
                subbed_chunk.append(sub_text)

            possible_answers.extend(iterate_subs(subbed_chunk, index + 1))
        
        return possible_answers
    
    possible_texts = iterate_subs(chunks, 0)

    best_decryption = None
    best_score = None
    
    for decrypted_message in possible_texts:
        bigram_score, english_word_ratio, char_score = evaluate_decryption(decrypted_message)
        if english_word_ratio == 1: # ONLY ALLOW ENGLISH
            score = bigram_score + char_score  
            if best_score is None or score > best_score:
                best_score = score
                best_decryption = decrypted_message
    
    return best_decryption

cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."
print(solve_vigrene(cipher).upper())

In [None]:
w = ['security is the first cause of misfortune. this is an old german proverb.', 'security is the first cause of misfortune. this is an owl german proverb.', 'security is the first cause of misfortune. this is an owk german proverb.', 'security is the first cause of misfortune. this is an odz german proverb.', 'security is the first cause of misfortune. this is an owd german proverb.']


In [None]:
import nltk
from nltk.util import ngrams
from nltk.corpus import brown
from collections import defaultdict, Counter

nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')
nltk.download('brown')

bigrams = ngrams(brown.words(), 2)
bigram_freq = defaultdict(int)
for bigram in bigrams:
    bigram_freq[bigram] += 1

brown_words_set = set(word.lower() for word in brown.words())
frequency_order = 'etaoinshrdlcumfwygpbvkxjqz'[::-1]

def evaluate_decryption(decrypted_message):
    words = decrypted_message.split()

    message_bigrams = list(ngrams(words, 2))
    bigram_score = sum(bigram_freq.get(bigram, 0) for bigram in message_bigrams)
    
    english_word_ratio = sum(1 for word in words if word.lower().replace(".", "") in brown_words_set) / len(words)
    
    char_score = sum(frequency_order.index(char.lower()) for char in decrypted_message if char.lower() in frequency_order)
    
    return bigram_score, english_word_ratio, char_score

In [None]:
for e in w:
    print(evaluate_decryption(e))

In [None]:
Evaluating: security is the first cause of misfortune. this is an old german grocery.
Overall Score: 17668.466455918515
English Word Ratio: 1.0
Average Word Length: 4.6923076923076925
Bigram Score: 1757
Length Coherence: 0.8666666666666667
Most Common Character Frequency: 0.0958904109589041

In [None]:
Evaluating: security is the first cause of misfortune. this is an old german proverb.
Overall Score: 17668.466455918515
English Word Ratio: 1.0
Average Word Length: 4.6923076923076925
Bigram Score: 1757
Length Coherence: 0.8666666666666667
Most Common Character Frequency: 0.0958904109589041

In [1]:
import nltk
from nltk.corpus import words, brown
from nltk.probability import FreqDist
from nltk import ngrams
import re
from functools import lru_cache
from tqdm import tqdm
from IPython.display import clear_output

nltk.download('words', quiet=True)
nltk.download('brown', quiet=True)

nltk_words = set(word.lower() for word in words.words())
brown_words = set(word.lower() for word in brown.words())
word_freq = FreqDist(word.lower() for word in brown.words())

bigram_freq = FreqDist(ngrams(brown.words(), 2))
frequency_order = 'etaoinshrdlcumwfgypbvkjxzq'[::-1]

@lru_cache(maxsize=1000)
def create_pattern(substitution_pattern):
    return ''.join('*' if char.isupper() else char for char in substitution_pattern)

@lru_cache(maxsize=1000)
def find_matching_words(substitution_pattern):
    pattern = create_pattern(substitution_pattern)
    regex_pattern = re.compile(f'^{pattern.replace("*", ".")}$')
    return sorted(
        (word for word in nltk_words if regex_pattern.fullmatch(word)),
        key=lambda x: word_freq[x],
        reverse=True
    )
    
@lru_cache(maxsize=1000)
def validate_mapping(cipher, decipher):
    return len(set(cipher)) == len(set(decipher))

def evaluate_decryption(decrypted_message):
    words = decrypted_message.split()
    message_bigrams = list(ngrams(words, 2))
    bigram_score = sum(bigram_freq.get(bigram, 0) for bigram in message_bigrams)
    english_word_count = sum(1 for word in words if word.lower().replace(".", "") in brown_words)
    english_word_ratio = english_word_count / len(words) if words else 0
    char_score = sum(frequency_order.index(char.lower()) for char in decrypted_message if char.isalpha())
    return bigram_score, english_word_ratio, char_score

def solve_vigenere(text):
    chunks = sorted(set(text.replace(".", "").split(" ")), key=len, reverse=True)
    
    def iterate_subs(chunk, index, mapping):
        if all(word.islower() for word in chunk):
            if not validate_mapping("".join(chunks), "".join(chunk)):
                return []
            copy_text = text
            for i, word in enumerate(chunk):
                copy_text = copy_text.replace(chunks[i], word)
            return [copy_text]
        
        if index >= len(chunk):
            return []
        
        if chunk[index].islower():
            return iterate_subs(chunk, index + 1, mapping)
        
        matches = find_matching_words(chunk[index])
        if not matches:
            return []
        
        possible_answers = []
        for match in tqdm(matches, desc=f"Processing chunk: ", leave=False):
            new_mapping = mapping.copy()
            valid = True
            for c, m in zip(chunk[index], match):
                if c.isupper():
                    if c in new_mapping and new_mapping[c] != m:
                        valid = False
                        break
                    new_mapping[c] = m
            if not valid:
                continue
            
            subbed_chunk = [word.translate(str.maketrans(new_mapping)) for word in chunk]
            possible_answers.extend(iterate_subs(subbed_chunk, index + 1, new_mapping))
            
        clear_output(wait=True)
            
        return possible_answers
    
    print("Solving cipher...")
    possible_texts = iterate_subs(chunks, 0, {})
    best_decryption = None
    best_score = float('-inf')
    
    for decrypted_message in tqdm(possible_texts, desc="Evaluating decryptions", unit="text", leave=False):
        bigram_score, english_word_ratio, char_score = evaluate_decryption(decrypted_message)
        if english_word_ratio == 1:
            score = bigram_score + char_score
            if score > best_score:
                best_score = score
                best_decryption = decrypted_message
    
    return best_decryption.upper()

cipher = "PRCSOFQX FP QDR AFOPQ CZSPR LA JFPALOQSKR. QDFP FP ZK LIU BROJZK MOLTROE."
result = solve_vigenere(cipher)
print("Decrypted message:", result)

Processing chunk:   6%|▌         | 1893/30824 [00:48<11:27, 42.08it/s]

: 

In [73]:
result.upper()

'SECURITY IS THE FIRST CAUSE OF MISFORTUNE. THIS IS AN OLD GERMAN PROVERB.'