In [4]:
import functools, math, string, re, random, heapq, requests, itertools
from collections import Counter

class OneGramDist(dict):
    def __init__(self, filename):
        self.gramCount = 0

        for line in open(filename):
            (word, count) = line[:-1].split('\t')
            self[word] = int(count)
            self.gramCount += self[word]

    def __call__(self, key):
        if key in self:
            return float(self[key]) / self.gramCount
        else:
            if self.gramCount == 0:
                return 1e-10
            else:
                return 1.0 / (self.gramCount * 10 ** min(len(key) - 2, 10))

singleWordProb = OneGramDist('one_word.txt')
trigramLetterProb = OneGramDist('trigram.txt')
bigramLetterProb = OneGramDist('bigram.txt')

def wordSeqFitness(words):
   return functools.reduce(lambda x,y: x+y,
     (math.log10(singleWordProb(w)) for w in words))


def letterNGrams(msg, n):
   return [msg[i:i+n] for i in range(len(msg) - (n-1))]

def trigramStringProb(msg):
   return sum(math.log10(trigramLetterProb(trigram))
      for trigram in letterNGrams(msg, 3))

alphabet = "abcdefghijklmnopqrstuvwxyz"

def decrypt(msg, key): return msg.translate(str.maketrans(key, alphabet))
def keySwap(key, a, b): return key.translate(str.maketrans(a+b, b+a))

def localMaximum(msg, key, decryptionFitness, numSteps):
   decryption = decrypt(msg, key)
   value = decryptionFitness(decryption)
   neighbors = iter(neighboringKeys(key, decryption))

   for step in range(numSteps):
      nextKey = next(neighbors)
      nextDecryption = decrypt(msg, nextKey)
      nextValue = decryptionFitness(nextDecryption)

      if nextValue > value:
         key, decryption, value = nextKey, nextDecryption, nextValue
         neighbors = iter(neighboringKeys(key, decryption))

   return decryption

def neighboringKeys(key, decryptedMsg):
   bigrams = sorted(letterNGrams(decryptedMsg, 2),
                    key=bigramLetterProb)[:30]

   for c1, c2 in bigrams:
      for a in shuffled(alphabet):
         if c1 == c2 and bigramLetterProb(a+a) > bigramLetterProb(c1+c2):
            yield keySwap(key, a, c1)
         else:
            if bigramLetterProb(a+c2) > bigramLetterProb(c1+c2):
               yield keySwap(key, a, c1)
            if bigramLetterProb(c1+a) > bigramLetterProb(c1+c2):
               yield keySwap(key, a, c2)

   while True:
      yield keySwap(key, random.choice(alphabet),
                         random.choice(alphabet))

def steepestAscent(msg, key, decryptionFitness, numSteps):
   decryption = decrypt(msg, key)
   value = decryptionFitness(decryption)
   neighbors = iter(neighboringKeys(key, decryption))

   for step in range(numSteps):
      nextKey = next(neighbors)
      nextDecryption = decrypt(msg, nextKey)
      nextValue = decryptionFitness(nextDecryption)

      if nextValue > value:
         key, decryption, value = nextKey, nextDecryption, nextValue
         neighbors = iter(neighboringKeys(key, decryption))

   return decryption

def shuffled(s):
   sList = list(s)
   random.shuffle(sList)
   return ''.join(sList)

def preprocessInputMessage(chars):
   return ''.join(re.findall('[a-z]+', chars.lower()))

def formatDecryption(ciphertext, decrypted):
    formatted = []
    alpha_idx = 0

    for char in ciphertext:
        if char.isalpha():
            # 알파벳이면 복호화된 결과에서 해당 문자 삽입
            formatted.append(decrypted[alpha_idx])
            alpha_idx += 1
        else:
            # 알파벳이 아니면 원본 문자 유지
            formatted.append(char)

    return ''.join(formatted)



def create_key_mapping(ciphertext, decrypted_text):
    # 알파벳 매핑 딕셔너리 생성
    key_mapping = {}
    # 모든 문자를 소문자로 변환
    ciphertext = ciphertext.lower()
    decrypted_text = decrypted_text.lower()

    for c_char, d_char in zip(ciphertext, decrypted_text):
        if c_char.isalpha() and d_char.isalpha():  # 알파벳일 경우만 매핑
            key_mapping[c_char] = d_char

    return key_mapping


def compare_with_onegram(decrypted_text, onegram_words):
    decrypted_words = re.findall(r'\b[a-zA-Z]+\b', decrypted_text.lower())
    matched_words= sum(1 for word in decrypted_words if word in onegram_words)
    return matched_words / len(decrypted_words) if decrypted_words else 0


def crackSubstitution(msg, onegram_words,numSteps=8000, restarts=40):
    ciphertext = msg
    best_decryption = None
    best_similarity = 0
    best_key = None
    msg = preprocessInputMessage(msg)
    startingKeys = [shuffled(alphabet) for _ in range(restarts)]

    for key in startingKeys:
        decryption = steepestAscent(msg, key, trigramStringProb, numSteps)
        formatted_decryption = formatDecryption(ciphertext, decryption)
        similarity = compare_with_onegram(formatted_decryption, onegram_words)

        if similarity >= best_similarity:
            best_similarity = similarity
            best_decryption = formatted_decryption
            print(f"best similarity : {best_similarity:.2%}")
            print(best_decryption)

            best_key=create_key_mapping(ciphertext, best_decryption)
            # If similarity reaches 99.9% or more, immediately return
            if best_similarity >= 0.99:
                return best_decryption, best_key, best_similarity
    return best_decryption, best_key,best_similarity

def calculate_letter_frequency(filepath):
    # 파일 읽기
    with open(filepath, 'r') as file:
        ciphertext = file.read().strip()

    # 알파벳 빈도수 계산
    frequency = Counter(filter(str.isalpha, ciphertext.lower()))

    return frequency

def format_text(ciphertext, decrypted_text):
    # 마침표로 문장을 나누고 각 문장 뒤에 마침표와 줄바꿈 추가
    formatted_text = []

    for i in range(len(ciphertext)):
        char = ciphertext[i]
        if char == '.':
            formatted_text.append('.\n')  # 마침표 다음에 줄바꿈 추가
        elif char.isalpha():
            # 대문자인 경우 대문자로 변환
            if char.isupper():
                formatted_text.append(decrypted_text[i].upper())
            else:
                formatted_text.append(decrypted_text[i].lower())
        else:
            formatted_text.append(char)  # 기타 문자는 그대로 추가

    return ''.join(formatted_text)


def decrypt_with_key(ciphertext, key):
    decrypted_text = ''
    for char in ciphertext:
        # 대문자인 경우 소문자로 변환하여 키를 찾고, 찾은 키를 대문자로 변환
        if char.isupper():
            lower_char = char.lower()
            if lower_char in key:
                decrypted_text += key[lower_char].upper()  # 찾은 키를 대문자로 변환
            else:
                decrypted_text += char  # 키가 없으면 그대로 추가
        else:
            # 암호문 문자가 key에 있으면 변환, 없으면 그대로 추가
            if char in key:
                decrypted_text += key[char]
            else:
                decrypted_text += char
    return decrypted_text


#### ciphertext 파일명은 수정하여야함.1->2 2->1
text="ciphertext2.txt" 
with open('one_word.txt', 'r') as file:
    onegram_words = {line.split('\t')[0]: int(line.split('\t')[1]) for line in file}
# 암호문 읽기
print("Reading ciphertext...")
with open(text, 'r') as file:
    ciphertext = file.read().strip()
print(ciphertext)
# 알파벳 빈도수 계산
letter_frequency = calculate_letter_frequency(text)
print("Letter Frequency in Ciphertext:")
for letter, count in sorted(letter_frequency.items()):
    print(f"{letter}: {count}")
# 복호화 실행
print("Starting decryption process...")
if text=="ciphertext1.txt":
    random.seed(24)
else:
    random.seed(9)
decrypted_text,key_mapping,best_similarity=crackSubstitution(ciphertext, onegram_words)
print("\nFinal Results:")
formatted_decrypted_text = format_text(ciphertext, decrypted_text)  
    #보기 쉽게 출력
print(formatted_decrypted_text)
print(f"Key mapping:\n")
for key, value in key_mapping.items():
    print(f"{key} -> {value}")

Reading ciphertext...
Rsrhbitr jalwiar jia isuiac ysoamsjh jilj xauljar jia upor lxo clsrar cpwkr. Ia jpp wpxwmtoar jilj lmm sr eamm. Jisr txsqacra xpe esjiptj l nlrjac raanr jp isn xasjiac rjacsma xpc yacjsma. Alwi ljpn py jilj rjpxa, alwi nsxaclm ymlka py jisr nptxjlsx ytmm py xsuij, lmpxa ypcnr l epcmo. Jia rjctuuma sjramy jp jia iasuijr sr axptui jp ysmm l nlx'r ialcj. Pxa ntrj snlusxa Rsrhbitr ilbbh.
Letter Frequency in Ciphertext:
a: 38
b: 4
c: 14
e: 4
h: 4
i: 26
j: 34
k: 2
l: 23
m: 18
n: 10
o: 5
p: 22
q: 1
r: 29
s: 24
t: 10
u: 9
w: 6
x: 17
y: 10
Starting decryption process...
best similarity : 100.00%
sisyphus teaches the higher fidelity that negates the gods and raises rocks. he too concludes that all is well. this universe now without a master seems to him neither sterile nor fertile. each atom of that stone, each mineral flake of this mountain full of night, alone forms a world. the struggle itself to the heights is enough to fill a man's heart. one must imagine sisyphus happ