forked from asweigart/codebreaker
/
simpleSubBreaker.py
275 lines (222 loc) · 10.2 KB
/
simpleSubBreaker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# Simple Substitution Cipher Breaker
# http://inventwithpython.com/codebreaker (BSD Licensed)
"""
In this program, a "word pattern" is a description of which letters are
repeated in a word. A word pattern is a series of numbers delimited by periods.
The first letter to appear in the word is assigned 0, the second letter 1, and
so on. So the word pattern for 'cucumber' is '0.1.0.1.2.3.4.5' because the
first letter 'c' occurs as the first and third letter in the word 'cucumber'.
So the pattern has '0' as the first and third number.
The pattern for 'abc' or 'cba' is '0.1.2'
The pattern for 'aaa' or 'bbb' is '0.0.0'
The pattern for 'hello' is '0.1.2.2.3'
The pattern for 'advise' or 'closet' is '0.1.2.3.4.5' (they have only unique
letters in the word)
In this program, a "candidate" is a possible English word that a ciphertext
work can decrypt to.
For example, 'cucumber', 'mementos', and 'cocoanut' are candidates for the
ciphertext word 'JHJHWDOV' (because all of them have the pattern
'0.1.0.1.2.3.4.5')
In this program, a "map" or "mapping" is a dictionary where the keys are the
letters in SYMBOLS (e.g. 'A', 'B', 'C', etc) and the values are lists of
letters that could possibly be the correct decryption. If the list is blank,
this means that it is unknown what this letter could decrypt to.
"""
# The import statement for wordPatterns is further down.
import os, sys, simpleSubCipher, re, copy
SYMBOLS = simpleSubCipher.SYMBOLS
def main():
message = 'SY L NLX SR PYYACAO L YLWJ EISWI UPAR LULSXRJ ISR SXRJSXWJR, IA ESMM RWCTJSXSZA SJ WMPRAMH, LXO TXMARR JIA AQSOAXWA SR PQACEIAMNSXU, IA ESMM CAYTRA JP FAMSAQA SJ. SY, PX JIA PJIAC ILXO, IA SR PYYACAO RPNAJISXU EISWI LYYPCOR L CALRPX YPC LWJSXU SX LWWPCOLXWA JP ISR SXRJSXWJR, IA ESMM LWWABJ SJ AQAX PX JIA RMSUIJARJ AQSOAXWA. JIA PCSUSX PY NHJIR SR AGBMLSXAO SX JISR ELH. -FACJCLXO CTRRAMM'
nonLettersPattern = re.compile('[^A-Z\s]')
ciphertext = nonLettersPattern.sub('', message.upper()).split()
# allCandidates is a dict with keys of a single ciphertext word, and
# values of the possible word patterns
# e.g. allCandidates == {'PYYACAO': ['alleged', 'ammeter', ...etc],
# 'EISWI': ['aerie', 'aging', 'algol', ...etc],
# 'LULSXRJ': ['abalone', 'abashed', ...etc],
# ...etc }
allCandidates = {}
for cipherWord in ciphertext:
pattern = getWordPattern(cipherWord)
if pattern not in wordPatterns.allPatterns:
continue
allCandidates[cipherWord] = copy.copy(wordPatterns.allPatterns[pattern])
# convert candidate words to uppercase
for i in range(len(allCandidates[cipherWord])):
allCandidates[cipherWord][i] = allCandidates[cipherWord][i].upper()
# determine the possible valid ciphertext translations
print('Breaking...')
theMap = breakSimpleSub(getNewBlankMapping(), allCandidates)
# display the results to the user.
print('Done.')
print()
printMapping(ciphertext, theMap)
print()
print('Original ciphertext:')
print(message)
print('')
print('Broken ciphertext:')
print(decryptWithMap(message, theMap))
print()
def getWordPattern(word):
# Returns a string of the pattern form of the given word.
# e.g. '0.1.2.3.4.1.2.3.5.6' for 'DUSTBUSTER' or '0.1.2.2.3' for 'DOGGY'
word = word.upper()
nextNum = 0
letterNums = {}
wordPattern = []
for letter in word:
if letter in letterNums:
wordPattern.append(str(letterNums[letter]))
else:
wordPattern.append(str(nextNum))
letterNums[letter] = nextNum
nextNum += 1
return '.'.join(wordPattern)
def breakSimpleSub(theMap, allCandidates):
# allCandidate's format:
# { 'cipherword1': ['candidate1a', 'candidate1b', ...],
# 'cipherword2': ['candidate2a', 'candidate2b', ...],
# ...}
for cipherWord in allCandidates.keys():
# get a new mapping for each ciphertext word
newMap = getNewBlankMapping()
# create a map that has all the letters' possible candidate
# decryptions added to it
for candidate in allCandidates[cipherWord]:
newMap = addMappings(newMap, cipherWord, candidate)
# intersect this new map with the existing map
theMap = intersectMappings(theMap, newMap)
# remove any solved letters from the other possible mappings
theMap = removeSolvedLettersFromMapping(theMap)
return theMap
def getNewBlankMapping():
# Returns a dict where the keys are single-character strings of the
# uppercase letters, and the values are blank lists.
# E.g. {'A': [], 'B': [], 'C': [], ...etc}
theMap = {}
for i in SYMBOLS:
theMap[i] = []
return theMap
def addMappings(theMap, cipherWord, candidate):
# The theMap parameter is a "mapping" data structure that this function
# modifies. (See the comments at the top of this file.)
# The cipherWord parameter is a string value of the ciphertext word.
# The candidate parameter is a possible English word that the cipherWord
# could decrypt to.
# This function modifies theMap so that the mappings of the cipherWord's
# letters to the candidate's letters are added to theMap.
for i in range(len(cipherWord)):
if candidate[i] not in theMap[cipherWord[i]]:
theMap[cipherWord[i]].append(candidate[i])
return theMap
def intersectMappings(mapA, mapB):
# To intersect two maps, create a blank map, and that add only the
# candidate decryption letters if they exist in both maps.
result = getNewBlankMapping()
for i in mapA.keys():
# An empty list means "any letter is possible". So just copy the other
# map entirely.
if mapA[i] == []:
result[i] = copy.copy(mapB[i])
elif mapB[i] == []:
result[i] = copy.copy(mapA[i])
else:
for j in mapA[i]:
if j in mapB[i]:
result[i].append(j)
return result
def removeSolvedLettersFromMapping(theMap):
# Letters in the mapping that map to only one letter are consider "solved"
# and can be removed from the other letters.
# For example, if 'A' maps to possible letters ['M', 'N'], and 'B' maps
# to ['N'], then we know that 'B' must map to 'N', so we can remove 'N'
# from the list of what 'A' could map to. So 'A' then maps to ['M'].
# Note that now that 'A' maps to only one letter, we can remove 'M'
# from the list of possible mappings for every other letter. (This is why
# there is a loop that keeps reducing the map.)
previousSolvedLetters = []
solvedLetters = None
while previousSolvedLetters != solvedLetters:
# This loop will break when solvedLetters is not changed by the
# reduction process (and is the same as previousSolvedLetters).
previousSolvedLetters = solvedLetters
solvedLetters = []
# solvedLetters will be a list of English letters that have one and
# only one possible mapping in theMap
for i in theMap:
if len(theMap[i]) == 1:
solvedLetters.append(theMap[i][0])
# If a letter is solved, than it cannot possibly be a possible
# decryption letter for a different ciphertext letter, so we should
# remove it.
for i in theMap:
for s in solvedLetters:
if len(theMap[i]) != 1 and s in theMap[i]:
theMap[i].remove(s)
# With a letter removed, it's possible that we may have reduced other
# ciphertext letters to one and only one solution, so keep looping
# until previousSolvedLetters == solvedLetters. At that point, we'll
# know we can't rmemove any more letters.
return theMap
def printMapping(ciphertext, theMap):
# Display a mapping data structure on the screen.
print('Mapping:')
print(' ' + ' '.join(list(SYMBOLS)))
print(' ' + ' '.join('=' * len(SYMBOLS)))
for i in range(len(SYMBOLS)):
print(' ', end='')
foundAnyLetters = False
for j in SYMBOLS:
# theMap[j] points to a list of single-character strings that
# are potential solutions for the ciphertext letter in j.
if len(theMap[j]) > i:
foundAnyLetters = True
print(theMap[j][i] + ' ', end='')
else:
print(' ', end='')
print()
if foundAnyLetters == False:
break
def decryptWithMap(ciphertext, theMap):
# This function will do a simple sub decryption of ciphertext with the
# information in theMap, instead of a simple sub key.
# First create a simple sub key from the theMap mapping.
key = ['x'] * len(SYMBOLS)
for letter in theMap.keys():
if len(theMap[letter]) == 1:
# There is only one possible letter mapping, so add it to the key.
keyIndex = SYMBOLS.find(theMap[letter][0].upper())
key[keyIndex] = letter.upper()
else:
ciphertext = ciphertext.replace(letter, '_')
key = ''.join(key)
# Then decrypt the original ciphertext with this key and return the
# decryption.
return simpleSubCipher.decryptMessage(key, ciphertext)
# If the wordPatterns.py file does not exist, create it based on the words
# in our dictionary text file, dictionary.txt.
# (You can download this file from http://inventwithpython.com/dictionary.txt)
if not os.path.exists('wordPatterns.py'):
import pprint # import the "pretty print" module
allPatterns = {}
fp = open('dictionary.txt')
wordList = fp.readlines()
fp.close()
for word in wordList:
word = word.strip() # get rid of newline at the end of the string
pattern = getWordPattern(word)
if pattern not in allPatterns:
allPatterns[pattern] = [word]
else:
allPatterns[pattern].append(word)
# This is code that writes code. The wordPatterns.py file contains
# one very, very large assignment statement.
fp = open('wordPatterns.py', 'w')
fp.write('allPatterns = ')
fp.write(pprint.pformat(allPatterns))
fp.close()
# Import our wordPatterns.py file.
import wordPatterns
if __name__ == '__main__':
main()