# Imports

In [132]:
import re
import numpy as np
import math
import zlib


# Read text

In [133]:
def read_text(path):
    lines = ''

    with open(path, 'r', encoding='utf-8') as file:
        lines = file.read()

    return lines

def write_text(path, lines):
    with open(path, 'w', encoding='utf-8') as file:
        file.write(lines)


path = '.\\texts\\roksolana.txt'
text = read_text(path).lower()

formatted_text = re.sub(u'[^а-яіґєї]|э|ы|ъ', '', text)
formatted_text = re.sub(u'ґ', 'г', formatted_text)

text_len = len(formatted_text)

write_text('.\\texts\\formatted_text.txt', formatted_text)

# Gathering monograms and bigrams statistics

In [134]:
alphabet_str = 'абвгдеєжзиіїйклмнопрстуфхцчшщьюя'
alphabet_length = len(alphabet_str)

alphabet_statistics = {}
alphabet_frequencies = {}

for letter in alphabet_str:
    alphabet_statistics[letter] = 0

for letter in formatted_text:
    alphabet_statistics[letter] += 1

for letter in alphabet_statistics.keys():
    alphabet_frequencies[letter] = alphabet_statistics[letter] / text_len

monogram_to_number = {}
for i in range(alphabet_length):
	monogram_to_number[alphabet_str[i]] = i


In [135]:
all_bigrams = []

bigrams_statistics = {}
bigrams_frequencies = {}

for first in alphabet_str:
    for second in alphabet_str:
        bigram = first + second

        all_bigrams.append(bigram)
        bigrams_statistics[bigram] = 0
        bigrams_frequencies[bigram] = 0

for index in range(text_len - 1):
    bigrams_statistics[formatted_text[index] + formatted_text[index + 1]] += 1

for bigram in bigrams_frequencies.keys():
    bigrams_frequencies[bigram] = bigrams_statistics[bigram] / (text_len - 1)

all_bigrams_length = len(all_bigrams)

bigram_to_number = {}
for i in range(all_bigrams_length):
	bigram_to_number[all_bigrams[i]] = i


## Entropy and Coincidence Index

In [136]:
monogram_entropy = -1 * sum([alphabet_frequencies[letter] * np.log2(alphabet_frequencies[letter]) for letter in alphabet_str])
bigram_entropy = -0.5 * sum([bigrams_frequencies[bigram] * np.log2(bigrams_frequencies[bigram]) if bigrams_frequencies[bigram] != 0 else 0 for bigram in all_bigrams])

coincidence_index = sum([
	alphabet_statistics[letter] * (alphabet_statistics[letter] - 1)
for letter in alphabet_str]) / text_len / (text_len - 1)


## Generating texts

In [137]:
texts_count = 10000

def generate_texts(text_size, texts_count, step):
	text_array = []

	index = 0
	for n in range(texts_count):
		text_array.append(formatted_text[index:(index + text_size)])
		index += step

	return text_array

texts_10 = generate_texts(10, 10000, 10)
texts_100 = generate_texts(100, 10000, 100)
texts_1000 = generate_texts(1000, 10000, 100)
texts_10000 = generate_texts(10000, 1000, 1000)


# Distortion algorithms

## Vigenere cipher

In [138]:
def mono_vigenere_cipher(text, key):
    key_length = len(key)

    ciphered_text = ''
    for index in range(len(text)):
        key_index = index % key_length
        ciphered_text += alphabet_str[(monogram_to_number[text[index]] + key[key_index]) % alphabet_length]

    return ciphered_text

def bi_vigenere_cipher(text, key):
    key_length = len(key)

    ciphered_text = ''

    for index in range(0, len(text), 2):
        key_index = (index // 2) % key_length
        ciphered_text += all_bigrams[(bigram_to_number[text[index:index+2]] + key[key_index]) % all_bigrams_length]

    return ciphered_text


## Affine cipher

In [139]:
def mono_affine_cipher(text, key):
	a, b = key
	ciphered_text = ''

	for letter in text:
		ciphered_letter_index = (a * monogram_to_number[letter] + b) % alphabet_length

		ciphered_text += alphabet_str[ciphered_letter_index]

	return ciphered_text


def bi_affine_cipher(text, key):
	a, b = key
	ciphered_text = ''

	for bigram in re.findall(r'..', text):
		ciphered_letter_index = (a * bigram_to_number[bigram] + b) % (all_bigrams_length)

		ciphered_text += all_bigrams[ciphered_letter_index]

	return ciphered_text


## Uniform distortion

In [140]:
def mono_uniform_distortion(size):
    return ''.join([alphabet_str[elem] for elem in np.random.randint(low=0, high=alphabet_length, size=size)])

def bi_uniform_distortion(size):
    return ''.join([all_bigrams[elem] for elem in np.random.randint(low=0, high=all_bigrams_length, size=size)])


## Recurrent sequence

In [141]:
def generate_random_lgram(high = alphabet_length, size = 1):
	return np.random.randint(low = 0, high = high, size = size)

def mono_recurrent_sequence(size):
	s_0, s_1 = generate_random_lgram(alphabet_length, 2)
	recurrent_sequence = alphabet_str[s_0] + alphabet_str[s_1]
	
	for i in range(2, size):
		prev_letter_index = monogram_to_number[recurrent_sequence[i - 1]]
		prev_prev_letter_index = monogram_to_number[recurrent_sequence[i - 2]]
		next_letter_index = (prev_letter_index + prev_prev_letter_index) % alphabet_length

		recurrent_sequence += alphabet_str[next_letter_index]

	return recurrent_sequence

def bi_recurrent_sequence(size):
	s_0, s_1 = generate_random_lgram(all_bigrams_length, 2)
	recurrent_sequence = all_bigrams[s_0] + all_bigrams[s_1]
	
	for i in range(2, size // 2):
		prev_letter_index = bigram_to_number[recurrent_sequence[-2:]]
		prev_prev_letter_index = bigram_to_number[recurrent_sequence[-4:-2]]

		next_bigram_index = (prev_letter_index + prev_prev_letter_index) % (all_bigrams_length)

		recurrent_sequence += all_bigrams[next_bigram_index]

	return recurrent_sequence


# Criterias

In [142]:
def get_prohibited_grams(grams, quartile):
	entries = grams.items()
	return dict(sorted(entries, key = lambda tuple: tuple[1], reverse=True)[math.floor(quartile * len(entries)):])

prohibited_monograms = get_prohibited_grams(alphabet_statistics, 0.9)
prohibited_monograms_keys = list(prohibited_monograms.keys())

prohibited_bigrams = get_prohibited_grams(bigrams_statistics, 0.75)
prohibited_bigrams_keys = list(prohibited_bigrams.keys())


### Criteria 1.1 (criteria 1.0 in parameters)

In [143]:
def mono_criteria_1_1(text, k_prohibited = 1):
    a_ap = set(text)

    k_counter = 0

    for elem in a_ap:
        if elem in prohibited_monograms_keys:
            k_counter += 1

    if (k_counter >= k_prohibited):
        return 0
    else:
        return 1 

def bi_criteria_1_1(text, k_prohibited = 1):
    bigrams_text = [text[i:i+2] for i in range(len(text) - 1)]
    a_ap = set(bigrams_text)

    k_counter = 0

    for elem in a_ap:
        if elem in prohibited_bigrams_keys:
            k_counter += 1

    if (k_counter >= k_prohibited):
        return 0
    else:
        return 1


### Criteria 1.2

In [169]:
def mono_prohibited_frequencies_1(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for monogram in prohibited_monograms_keys:
		prohibited_frequencies[monogram] = 0

	for letter in text:
		if letter in prohibited_monograms_keys:
			prohibited_frequencies[letter] += 1

	for monogram in prohibited_monograms_keys:
		prohibited_frequencies[monogram] /= text_length

		if (prohibited_frequencies[monogram] >= limit):
			return 0
		
	return 1

def bi_prohibited_frequencies_1(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for i in range(text_length - 1):
		bigram = text[i:i + 2]

		if bigram in prohibited_bigrams_keys:
			try:
				prohibited_frequencies[bigram] += 1
			except KeyError:
				prohibited_frequencies[bigram] = 1

	for bigram in prohibited_frequencies.keys():
		prohibited_frequencies[bigram] /= (text_length - 1)

		if (prohibited_frequencies[bigram] >= limit):
			return 0
		
	return 1


### Criteria 1.3

In [145]:
def mono_prohibited_frequencies_2(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for monogram in prohibited_monograms_keys:
		prohibited_frequencies[monogram] = 0

	for letter in text:
		if letter in prohibited_monograms_keys:
			prohibited_frequencies[letter] += 1

	freq_sum = sum(prohibited_frequencies.values()) / text_length

	if freq_sum > limit:
		return 0
		
	return 1

def bi_prohibited_frequencies_2(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for i in range(text_length - 1):
		bigram = text[i:i + 2]

		if bigram in prohibited_bigrams_keys:
			try:
				prohibited_frequencies[bigram] += 1
			except KeyError:
				prohibited_frequencies[bigram] = 1

	freq_sum = sum(prohibited_frequencies.values()) / (text_length - 1)

	if freq_sum > limit:
		return 0
		
	return 1

kek122121 = 5

try:
    kek122121 += 1
except NameError:
    kek122121 = 2


### Criteria 3.0

In [146]:
def get_monogram_distribution(text):
    text_length = len(text)
    stats = {}

    for elem in text:
        try:
            stats[elem] += 1
        except KeyError:
            stats[elem] = 1

    for key in stats.keys():
        stats[key] /= text_length

    return stats

def get_bigram_distribution(text):
    text_length = len(text)
    stats = {}
            
    for i in range(len(text) - 1):
        bigram = text[i:i+2]

        try:
            stats[bigram] += 1
        except KeyError:
            stats[bigram] = 1

    for key in stats.keys():
        stats[key] /= (text_length - 1)

    return stats

def get_specific_entropy(frequencies, l):
    specific_entropy = 0

    for frequency in frequencies.keys():
        specific_entropy -= frequencies[frequency] * np.log2(frequencies[frequency]) / l            

    return specific_entropy

def mono_criteria_3_0(text, limit):
    text_specific_entropy = get_specific_entropy(get_monogram_distribution(text), 1)

    result = abs(monogram_entropy - text_specific_entropy)

    if result > limit:
        return 0
    else:
        return 1

def bi_criteria_3_0(text, limit):
    text_specific_entropy = get_specific_entropy(get_bigram_distribution(text), 2)

    result = abs(bigram_entropy - text_specific_entropy)

    if result > limit:
        return 0
    else:
        return 1


### Criteria 5.1

In [147]:
def get_most_common_grams(grams, count):
	entries = grams.items()
	sorted_entries = sorted(entries, key = lambda tuple: tuple[1], reverse=True)[:count]
	
	return list(map(lambda item: item[0], sorted_entries))


In [148]:
most_common_monograms = get_most_common_grams(alphabet_statistics, 10)
most_common_bigrams_50 = get_most_common_grams(bigrams_statistics, 50)
most_common_bigrams_100 = get_most_common_grams(bigrams_statistics, 100)
most_common_bigrams_200 = get_most_common_grams(bigrams_statistics, 200)

def mono_empty_boxes(text, limit, most_common_monograms):
	most_common_monograms_in_text = {}

	for monogram in most_common_monograms:
		most_common_monograms_in_text[monogram] = 0

	for letter in text:
		if letter in most_common_monograms:
			most_common_monograms_in_text[letter] += 1

	if len(list(filter(lambda monogram: True if monogram[1] == 0 else False, most_common_monograms_in_text.items()))) > limit:
		return 0

	return 1

def bi_empty_boxes(text, limit, most_common_bigrams):
	most_common_bigrams_in_text = {}

	for bigram in most_common_bigrams:
		most_common_bigrams_in_text[bigram] = 0

	for i in range(len(text) - 1):
		bigram = text[i:i + 2]

		if bigram in most_common_bigrams:
			most_common_bigrams_in_text[bigram] += 1

	if len(list(filter(lambda bigram: True if bigram[1] == 0 else False, most_common_bigrams_in_text.items()))) > limit:
		return 0

	return 1


### Structure criteria

In [149]:
def structure_criteria(text, limit):
	text_len = len(text)
	random_text = mono_uniform_distortion(text_len)

	random_coef = text_len / len(zlib.compress(random_text.encode('utf-8')))
	text_coef = text_len / len(zlib.compress(text.encode('utf-8')))

	if abs(random_coef - text_coef) < limit:
		return 0

	return 1


# Distortion texts

## Monograms

In [150]:
viginere_key_1 = generate_random_lgram(alphabet_length)
viginere_key_5 = generate_random_lgram(alphabet_length, 5)
viginere_key_10 = generate_random_lgram(alphabet_length, 10)

affine_key = generate_random_lgram(alphabet_length, 2)

while affine_key[0] % 2 != 1:
	affine_key = generate_random_lgram(alphabet_length, 2)

text_10_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_10],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_10],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_10],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_10],
	'random': [mono_uniform_distortion(10) for i in range(len(texts_10))],
	'reccurent': [mono_recurrent_sequence(10) for i in range(len(texts_10))]
}

text_100_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_100],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_100],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_100],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_100],
	'random': [mono_uniform_distortion(100) for i in range(len(texts_100))],
	'reccurent': [mono_recurrent_sequence(100) for i in range(len(texts_100))]
}

text_1000_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_1000],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_10000],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_1000],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_1000],
	'random': [mono_uniform_distortion(1000) for i in range(len(texts_1000))],
	'reccurent': [mono_recurrent_sequence(1000) for i in range(len(texts_1000))]
}

text_10000_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_10000],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_10000],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_10000],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_10000],
	'random': [mono_uniform_distortion(10000) for i in range(len(texts_10000))],
	'reccurent': [mono_recurrent_sequence(10000) for i in range(len(texts_10000))]
}


## Bigrams

In [151]:
viginere_key_1 = generate_random_lgram(all_bigrams_length)
viginere_key_5 = generate_random_lgram(all_bigrams_length, 5)
viginere_key_10 = generate_random_lgram(all_bigrams_length, 10)

affine_key = generate_random_lgram(all_bigrams_length, 2)

while affine_key[0] % 2 != 1:
	affine_key = generate_random_lgram(all_bigrams_length, 2)

text_10_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_10],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_10],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_10],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_10],
	'random': [bi_uniform_distortion(10) for i in range(len(texts_10))],
	'reccurent': [bi_recurrent_sequence(10) for i in range(len(texts_10))]
}

text_100_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_100],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_100],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_100],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_100],
	'random': [bi_uniform_distortion(100) for i in range(len(texts_100))],
	'reccurent': [bi_recurrent_sequence(100) for i in range(len(texts_100))]
}

text_1000_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_1000],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_1000],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_1000],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_1000],
	'random': [bi_uniform_distortion(1000) for i in range(len(texts_1000))],
	'reccurent': [bi_recurrent_sequence(1000) for i in range(len(texts_1000))]
}

text_10000_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_10000],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_10000],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_10000],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_10000],
	'random': [bi_uniform_distortion(10000) for i in range(len(texts_10000))],
	'reccurent': [bi_recurrent_sequence(10000) for i in range(len(texts_10000))]
}


In [152]:
def test_criteria_for_specific_distortion(criteria, text_array):
	def inner(*args):
		results = [criteria(text, *args) for text in text_array]

		return 1 - sum(results) / len(text_array)

	return inner


In [153]:
def test_criteria(criteria, text_array_dict):
	def inner(*args):
		for key in text_array_dict.keys():
			print('   ', key, 1 - test_criteria_for_specific_distortion(criteria, text_array_dict[key])(*args))

	return inner


# Criteria 1.0

## Real

In [154]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10)())

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_100)())

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_1000)())

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10000)())

L == 10
    0.1471
L == 100
    0.7924
L == 1000
    1.0
L == 10000
    1.0


## Monogram

In [155]:
print('L == 10')
test_criteria(mono_criteria_1_1, text_10_mono)()

print('L == 100')
test_criteria(mono_criteria_1_1, text_100_mono)()

print('L == 1000')
test_criteria(mono_criteria_1_1, text_1000_mono)()

print('L == 10000')
test_criteria(mono_criteria_1_1, text_10000_mono)()

L == 10
    viginere_1 0.3274
    viginere_5 0.21340000000000003
    viginere_10 0.1372
    affine 0.1936
    random 0.2643
    reccurent 0.28869999999999996
L == 100
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.014399999999999968
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.015800000000000036
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.02400000000000002


In [189]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_10)())

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_100)())

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_1000)())

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_10000)())

L == 10
    0.017199999999999993
L == 100
    0.1674
L == 1000
    0.8274
L == 10000
    1.0


## Bigram

In [156]:
print('L == 10')
test_criteria(bi_criteria_1_1, text_10_bi)()

print('L == 100')
test_criteria(bi_criteria_1_1, text_100_bi)()

print('L == 1000')
test_criteria(bi_criteria_1_1, text_1000_bi)()

print('L == 10000')
test_criteria(bi_criteria_1_1, text_10000_bi)()

L == 10
    viginere_1 0.16810000000000003
    viginere_5 0.09540000000000004
    viginere_10 0.10040000000000004
    affine 0.13650000000000007
    random 0.006399999999999961
    reccurent 0.10030000000000006
L == 100
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.00019999999999997797
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.00019999999999997797
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.0010000000000000009


# Criteria 1.1

## Real monograms

In [157]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10)(1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_100)(3))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_1000)(4))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10000)(4))

L == 10
    0.1471
L == 100
    0.08230000000000004
L == 1000
    0.5065
L == 10000
    0.997


## Distorted monogram

In [158]:
print('L == 10')
test_criteria(mono_criteria_1_1, text_10_mono)(1)

print('L == 100')
test_criteria(mono_criteria_1_1, text_100_mono)(3)

print('L == 1000')
test_criteria(mono_criteria_1_1, text_1000_mono)(4)

print('L == 10000')
test_criteria(mono_criteria_1_1, text_10000_mono)(4)

L == 10
    viginere_1 0.3274
    viginere_5 0.21340000000000003
    viginere_10 0.1372
    affine 0.1936
    random 0.2643
    reccurent 0.28869999999999996
L == 100
    viginere_1 0.18300000000000005
    viginere_5 0.011199999999999988
    viginere_10 0.0017000000000000348
    affine 0.013499999999999956
    random 0.008700000000000041
    reccurent 0.5734
L == 1000
    viginere_1 0.4536
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0008000000000000229
    random 0.0
    reccurent 0.9059
L == 10000
    viginere_1 0.0030000000000000027
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.898


## Real bigram

In [159]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_10)(1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_100)(10))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_1000)(50))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_10000)(100))

L == 10
    0.017199999999999993
L == 100
    0.0
L == 1000
    0.0
L == 10000
    0.0


## Distorted bigram

In [160]:
print('L == 10')
test_criteria(bi_criteria_1_1, text_10_bi)(1)

print('L == 100')
test_criteria(bi_criteria_1_1, text_100_bi)(10)

print('L == 1000')
test_criteria(bi_criteria_1_1, text_1000_bi)(50)

print('L == 10000')
test_criteria(bi_criteria_1_1, text_10000_bi)(100)

L == 10
    viginere_1 0.16810000000000003
    viginere_5 0.09540000000000004
    viginere_10 0.10040000000000004
    affine 0.13650000000000007
    random 0.006399999999999961
    reccurent 0.10030000000000006
L == 100
    viginere_1 0.01849999999999996
    viginere_5 0.00019999999999997797
    viginere_10 0.00029999999999996696
    affine 0.0031999999999999806
    random 0.0
    reccurent 0.007900000000000018
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.015900000000000025
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.06899999999999995


# Criteria 1.2

## Real monogram

In [161]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_10)(0.1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_100)(0.025))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_1000)(0.015))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_10000)(0.008))

L == 10
    0.1471
L == 100
    0.05259999999999998
L == 1000
    0.011399999999999966
L == 10000
    0.05800000000000005


## Distorted monogram

In [162]:
print('L == 10')
test_criteria(mono_prohibited_frequencies_1, text_10_mono)(0.1)

print('L == 100')
test_criteria(mono_prohibited_frequencies_1, text_100_mono)(0.025)

print('L == 1000')
test_criteria(mono_prohibited_frequencies_1, text_1000_mono)(0.015)

print('L == 10000')
test_criteria(mono_prohibited_frequencies_1, text_10000_mono)(0.008)

L == 10
    viginere_1 0.3274
    viginere_5 0.21340000000000003
    viginere_10 0.1372
    affine 0.1936
    random 0.2643
    reccurent 0.28869999999999996
L == 100
    viginere_1 0.03759999999999997
    viginere_5 0.0044999999999999485
    viginere_10 0.00019999999999997797
    affine 0.0024999999999999467
    random 0.01880000000000004
    reccurent 0.014399999999999968
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.015800000000000036
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.02400000000000002


## Real bigram

In [171]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_10)(0.1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_100)(0.0125))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_1000)(0.0035))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_10000)(0.001))

L == 10
    0.017199999999999993
L == 100
    0.0020999999999999908
L == 1000
    0.0020999999999999908
L == 10000
    0.01200000000000001


## Distorted bigram

In [194]:
print('L == 10')
test_criteria(bi_prohibited_frequencies_1, text_10_bi)(0.1)

print('L == 100')
test_criteria(bi_prohibited_frequencies_1, text_100_bi)(0.0125)

print('L == 1000')
test_criteria(bi_prohibited_frequencies_1, text_1000_bi)(0.0035)

print('L == 10000')
test_criteria(bi_prohibited_frequencies_1, text_10000_bi)(0.001)

L == 10
    viginere_1 0.16810000000000003
    viginere_5 0.09540000000000004
    viginere_10 0.10040000000000004
    affine 0.13650000000000007
    random 0.9612
    reccurent 0.10030000000000006
L == 100
    viginere_1 0.12680000000000002
    viginere_5 0.2379
    viginere_10 0.31590000000000007
    affine 0.1511
    random 0.7696
    reccurent 0.242
L == 1000
    viginere_1 0.0
    viginere_5 9.999999999998899e-05
    viginere_10 0.0014999999999999458
    affine 0.0
    random 0.3582000000000001
    reccurent 0.0017000000000000348
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.0010000000000000009


# Criteria 1.3

## Real monogram

In [173]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_10)(0.1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_100)(0.05))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_1000)(0.025))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_10000)(0.02))

L == 10
    0.00990000000000002
L == 100
    0.008199999999999985
L == 1000
    0.036900000000000044
L == 10000
    0.014000000000000012


## Distorted monogram

In [174]:
print('L == 10')
test_criteria(mono_prohibited_frequencies_2, text_10_mono)(0.1)

print('L == 100')
test_criteria(mono_prohibited_frequencies_2, text_100_mono)(0.05)

print('L == 1000')
test_criteria(mono_prohibited_frequencies_2, text_1000_mono)(0.025)

print('L == 10000')
test_criteria(mono_prohibited_frequencies_2, text_10000_mono)(0.02)

L == 10
    viginere_1 0.777
    viginere_5 0.578
    viginere_10 0.43209999999999993
    affine 0.5758
    random 0.6367
    reccurent 0.6177
L == 100
    viginere_1 0.06600000000000006
    viginere_5 0.0029000000000000137
    viginere_10 0.0
    affine 0.0030000000000000027
    random 0.009099999999999997
    reccurent 0.07689999999999997
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.015800000000000036
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.02400000000000002


## Real Bigram

In [175]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_10)(0.075))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_100)(0.025))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_1000)(0.015))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_10000)(0.008))

L == 10
    0.017199999999999993
L == 100
    0.0022999999999999687
L == 1000
    0.0
L == 10000
    0.0


## Distorted Bigram

In [195]:
print('L == 10')
test_criteria(bi_prohibited_frequencies_2, text_10_bi)(0.075)

print('L == 100')
test_criteria(bi_prohibited_frequencies_2, text_100_bi)(0.025)

print('L == 1000')
test_criteria(bi_prohibited_frequencies_2, text_1000_bi)(0.015)

print('L == 10000')
test_criteria(bi_prohibited_frequencies_2, text_10000_bi)(0.008)

L == 10
    viginere_1 0.16810000000000003
    viginere_5 0.09540000000000004
    viginere_10 0.10040000000000004
    affine 0.13650000000000007
    random 0.04300000000000004
    reccurent 0.10030000000000006
L == 100
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.0006000000000000449
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.00019999999999997797
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.0010000000000000009


# Criteria 3.0

## Real monogram

In [177]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_10)(1.95))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_100)(0.5))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_1000)(0.08))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_10000)(0.025))

L == 10
    0.043300000000000005
L == 100
    0.008600000000000052
L == 1000
    0.06369999999999998
L == 10000
    0.039000000000000035


## Distorted monogram

In [178]:
print('L == 10')
test_criteria(mono_criteria_3_0, text_10_mono)(1.95)

print('L == 100')
test_criteria(mono_criteria_3_0, text_100_mono)(0.5)

print('L == 1000')
test_criteria(mono_criteria_3_0, text_1000_mono)(0.08)

print('L == 10000')
test_criteria(mono_criteria_3_0, text_10000_mono)(0.025)

L == 10
    viginere_1 0.9567
    viginere_5 0.9787
    viginere_10 0.9731
    affine 0.9567
    random 0.982
    reccurent 0.9437
L == 100
    viginere_1 0.9914
    viginere_5 1.0
    viginere_10 1.0
    affine 0.9914
    random 1.0
    reccurent 0.7616
L == 1000
    viginere_1 0.9363
    viginere_5 0.0
    viginere_10 0.0
    affine 0.9363
    random 0.0
    reccurent 0.0
L == 10000
    viginere_1 0.961
    viginere_5 0.0
    viginere_10 0.0
    affine 0.961
    random 0.0
    reccurent 0.0


## Real bigram

In [192]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_10)(2))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_100)(2))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_1000)(0.275))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_10000)(0.075))

L == 10
    1.0
L == 100
    0.0
L == 1000
    0.4899
L == 10000
    0.03600000000000003


## Distorted bigram

In [193]:
print('L == 10')
test_criteria(bi_criteria_3_0, text_10_bi)(2)

print('L == 100')
test_criteria(bi_criteria_3_0, text_100_bi)(2)

print('L == 1000')
test_criteria(bi_criteria_3_0, text_1000_bi)(0.275)

print('L == 10000')
test_criteria(bi_criteria_1_1, text_10000_bi)(0.075)

L == 10
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.0
L == 100
    viginere_1 1.0
    viginere_5 1.0
    viginere_10 1.0
    affine 1.0
    random 1.0
    reccurent 0.9996
L == 1000
    viginere_1 1.0
    viginere_5 0.7463
    viginere_10 0.0050000000000000044
    affine 1.0
    random 0.0
    reccurent 0.6483
L == 10000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0
    reccurent 0.0010000000000000009


# Criteria 5.1

## Real monogram

In [181]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_10)(7, most_common_monograms))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_100)(0, most_common_monograms))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_1000)(0, most_common_monograms))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_10000)(0, most_common_monograms))

L == 10
    0.027900000000000036
L == 100
    0.038799999999999946
L == 1000
    0.0
L == 10000
    0.0


## Distorted Monogram

In [182]:
print('L == 10')
test_criteria(mono_empty_boxes, text_10_mono)(7, most_common_monograms)

print('L == 100')
test_criteria(mono_empty_boxes, text_100_mono)(0, most_common_monograms)

print('L == 1000')
test_criteria(mono_empty_boxes, text_1000_mono)(0, most_common_monograms)

print('L == 10000')
test_criteria(mono_empty_boxes, text_10000_mono)(0, most_common_monograms)

L == 10
    viginere_1 0.6617
    viginere_5 0.7166
    viginere_10 0.472
    affine 0.3595999999999999
    random 0.558
    reccurent 0.502
L == 100
    viginere_1 0.12480000000000002
    viginere_5 0.6978
    viginere_10 0.3435
    affine 0.05059999999999998
    random 0.6459
    reccurent 0.0
L == 1000
    viginere_1 0.9903
    viginere_5 1.0
    viginere_10 1.0
    affine 0.9848
    random 1.0
    reccurent 0.0
L == 10000
    viginere_1 1.0
    viginere_5 1.0
    viginere_10 1.0
    affine 1.0
    random 1.0
    reccurent 0.0


## Real bigram

In [183]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_10)(47, most_common_bigrams_50))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_100)(30, most_common_bigrams_50))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_1000)(10, most_common_bigrams_100))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_10000)(0, most_common_bigrams_200))


L == 10
    0.3296
L == 100
    0.05569999999999997
L == 1000
    0.0
L == 10000
    0.009000000000000008


## Distorted Bigram

In [184]:
print('L == 10')
test_criteria(bi_empty_boxes, text_10_bi)(47, most_common_bigrams_50)

print('L == 100')
test_criteria(bi_empty_boxes, text_100_bi)(30, most_common_bigrams_50)

print('L == 1000')
test_criteria(bi_empty_boxes, text_1000_bi)(10, most_common_bigrams_100)

print('L == 10000')
test_criteria(bi_empty_boxes, text_10000_bi)(0, most_common_bigrams_200)

L == 10
    viginere_1 0.03649999999999998
    viginere_5 0.012399999999999967
    viginere_10 0.007499999999999951
    affine 0.016900000000000026
    random 0.07120000000000004
    reccurent 0.013000000000000012
L == 100
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.00039999999999995595
    reccurent 0.0
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.14539999999999997
    reccurent 0.0
L == 10000
    viginere_1 0.0
    viginere_5 0.05300000000000005
    viginere_10 0.613
    affine 0.0
    random 1.0
    reccurent 0.0


# Structural Criteria

## Real monogram

In [185]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(structure_criteria, texts_10)(0.001))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(structure_criteria, texts_100)(0.025))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(structure_criteria, texts_1000)(0.15))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(structure_criteria, texts_10000)(0.25))

L == 10
    0.3791
L == 100
    0.06679999999999997
L == 1000
    0.0
L == 10000
    0.0


## Distorted monogram

In [186]:
print('L == 10')
test_criteria(structure_criteria, text_10_mono)(0.001)

print('L == 100')
test_criteria(structure_criteria, text_100_mono)(0.025)

print('L == 1000')
test_criteria(structure_criteria, text_1000_mono)(0.15)

print('L == 10000')
test_criteria(structure_criteria, text_10000_mono)(0.25)

L == 10
    viginere_1 0.6638
    viginere_5 0.5849
    viginere_10 0.6391
    affine 0.6427
    random 0.6171
    reccurent 0.6281
L == 100
    viginere_1 0.9269
    viginere_5 0.15659999999999996
    viginere_10 0.12009999999999998
    affine 0.9086
    random 0.046499999999999986
    reccurent 1.0
L == 1000
    viginere_1 1.0
    viginere_5 0.362
    viginere_10 0.0
    affine 0.9999
    random 0.0
    reccurent 1.0
L == 10000
    viginere_1 1.0
    viginere_5 0.0
    viginere_10 0.0
    affine 1.0
    random 0.0
    reccurent 1.0


## Distorted bigram

In [187]:
print('L == 10')
test_criteria(structure_criteria, text_10_bi)(0.001)

print('L == 100')
test_criteria(structure_criteria, text_100_bi)(0.025)

print('L == 1000')
test_criteria(structure_criteria, text_1000_bi)(0.15)

print('L == 10000')
test_criteria(structure_criteria, text_10000_bi)(0.25)

L == 10
    viginere_1 0.6304
    viginere_5 0.6037
    viginere_10 0.6234
    affine 0.6109
    random 0.8172
    reccurent 0.6094
L == 100
    viginere_1 0.6399
    viginere_5 0.0776
    viginere_10 0.0534
    affine 0.24839999999999995
    random 0.016800000000000037
    reccurent 0.19840000000000002
L == 1000
    viginere_1 0.2997000000000001
    viginere_5 0.0
    viginere_10 0.0
    affine 0.007600000000000051
    random 0.0
    reccurent 0.062000000000000055
L == 10000
    viginere_1 0.985
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0050000000000000044
    random 0.0
    reccurent 1.0


# Definitely not random text

In [188]:
text = ''

for i in range(10000):
	text += alphabet_str[i % alphabet_length]

print('   ', mono_prohibited_frequencies_2(text, 0.008))
print('   ', bi_prohibited_frequencies_2(text, 0.02))

    0
    0
