/
caesar_break_parameter_trials.py
89 lines (75 loc) · 2.83 KB
/
caesar_break_parameter_trials.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import random
import csv
from support.utilities import *
from support.language_models import *
from support.norms import *
from cipher.caesar import *
trials = 100
corpus = sanitise(cat([
open('support/shakespeare.txt').read(),
open('support/sherlock-holmes.txt').read(),
open('support/war-and-peace.txt').read()
]))
corpus_length = len(corpus)
euclidean_scaled_english_counts = euclidean_scale(english_counts)
metrics = [{'func': l1, 'invert': True, 'name': 'l1'},
{'func': l2, 'invert': True, 'name': 'l2'},
{'func': l3, 'invert': True, 'name': 'l3'},
{'func': cosine_similarity, 'invert': False, 'name': 'cosine_similarity'}]
scalings = [{'corpus_frequency': normalised_english_counts,
'scaling': normalise,
'name': 'normalised'},
{'corpus_frequency': euclidean_scaled_english_counts,
'scaling': euclidean_scale,
'name': 'euclidean_scaled'}]
message_lengths = [100, 50, 30, 20, 10, 5]
def make_frequency_compare_function(
target_frequency, frequency_scaling, metric, invert):
def frequency_compare(text):
counts = frequency_scaling(frequencies(text))
if invert:
score = -1 * metric(target_frequency, counts)
else:
score = metric(target_frequency, counts)
return score
return frequency_compare
models = (
[ {'func': make_frequency_compare_function(
s['corpus_frequency'], s['scaling'],
m['func'], m['invert']),
'name': '{} + {}'.format(m['name'], s['name'])}
for m in metrics
for s in scalings ]
+
[{'func': Pletters, 'name': 'Pletters'},
{'func': Pbigrams, 'name': 'Pbigrams'},
{'func': Ptrigrams, 'name': 'Ptrigrams'}]
)
def random_ciphertext(message_length):
sample_start = random.randint(0, corpus_length - message_length)
sample = corpus[sample_start:(sample_start + message_length)]
key = random.randint(1, 25)
ciphertext = caesar_encipher(sample, key)
return key, ciphertext
def eval_models():
return {m['name']: {l: eval_one_model(m, l) for l in message_lengths}
for m in models}
def eval_one_model(model, message_length):
print(model['name'], message_length)
successes = 0
for _ in range(trials):
key, ciphertext = random_ciphertext(message_length)
found_key, _ = caesar_break(ciphertext, model['func'])
if found_key == key:
successes += 1
return successes
def write_results(scores):
with open('caesar_break_parameter_trials.csv', 'w') as f:
writer = csv.DictWriter(f, ['name'] + message_lengths,
quoting=csv.QUOTE_NONNUMERIC)
writer.writeheader()
for scoring in sorted(scores):
scores[scoring]['name'] = scoring
writer.writerow(scores[scoring])
scores = eval_models()
write_results(scores)