# Imports

In [1]:
import re
import numpy as np
import math
import zlib


# Read text

In [2]:
def read_text(path):
    lines = ''

    with open(path, 'r', encoding='utf-8') as file:
        lines = file.read()

    return lines

def write_text(path, lines):
    with open(path, 'w', encoding='utf-8') as file:
        file.write(lines)


path = '.\\texts\\roksolana.txt'
# path = '.\\texts\\tiger_catchers.txt'
text = read_text(path).lower()

formatted_text = re.sub(u'[^а-яіґєї]|э|ы|ъ', '', text)
formatted_text = re.sub(u'ґ', 'г', formatted_text)

text_len = len(formatted_text)

write_text('.\\texts\\formatted_text.txt', formatted_text)

# Gathering monograms and bigrams statistics

In [3]:
alphabet_str = 'абвгдеєжзиіїйклмнопрстуфхцчшщьюя'
alphabet_length = len(alphabet_str)

alphabet_statistics = {}
alphabet_frequencies = {}

for letter in alphabet_str:
    alphabet_statistics[letter] = 0

for letter in formatted_text:
    alphabet_statistics[letter] += 1

for letter in alphabet_statistics.keys():
    alphabet_frequencies[letter] = alphabet_statistics[letter] / text_len

monogram_to_number = {}
for i in range(alphabet_length):
	monogram_to_number[alphabet_str[i]] = i


In [4]:
all_bigrams = []

bigrams_statistics = {}
bigrams_frequencies = {}

for first in alphabet_str:
    for second in alphabet_str:
        bigram = first + second

        all_bigrams.append(bigram)
        bigrams_statistics[bigram] = 0
        bigrams_frequencies[bigram] = 0

for index in range(text_len - 1):
    bigrams_statistics[formatted_text[index] + formatted_text[index + 1]] += 1

for bigram in bigrams_frequencies.keys():
    bigrams_frequencies[bigram] = bigrams_statistics[bigram] / (text_len - 1)

all_bigrams_length = len(all_bigrams)

bigram_to_number = {}
for i in range(all_bigrams_length):
	bigram_to_number[all_bigrams[i]] = i


## Entropy and Coincidence Index

In [5]:
monogram_entropy = -1 * sum([alphabet_frequencies[letter] * np.log2(alphabet_frequencies[letter]) for letter in alphabet_str])
bigram_entropy = -0.5 * sum([bigrams_frequencies[bigram] * np.log2(bigrams_frequencies[bigram]) if bigrams_frequencies[bigram] != 0 else 0 for bigram in all_bigrams])

coincidence_index = sum([
	alphabet_statistics[letter] * (alphabet_statistics[letter] - 1)
for letter in alphabet_str]) / text_len / (text_len - 1)


## Generating texts

In [6]:
texts_count = 10000

def generate_texts(text_size, texts_count, step):
	text_array = []

	index = 0
	for n in range(texts_count):
		text_array.append(formatted_text[index:(index + text_size)])
		index += step

	return text_array

texts_10 = generate_texts(10, 10000, 10)
texts_100 = generate_texts(100, 10000, 100)
texts_1000 = generate_texts(1000, 10000, 100)
texts_10000 = generate_texts(10000, 1000, 1000)


# Distortion algorithms

## Vigenere cipher

In [7]:
def mono_vigenere_cipher(text, key):
    key_length = len(key)

    ciphered_text = ''
    for index in range(len(text)):
        key_index = index % key_length
        ciphered_text += alphabet_str[(monogram_to_number[text[index]] + key[key_index]) % alphabet_length]

    return ciphered_text

def bi_vigenere_cipher(text, key):
    key_length = len(key)

    ciphered_text = ''

    for index in range(0, len(text), 2):
        key_index = (index // 2) % key_length
        ciphered_text += all_bigrams[(bigram_to_number[text[index:index+2]] + key[key_index]) % all_bigrams_length]

    return ciphered_text


## Affine cipher

In [8]:
def mono_affine_cipher(text, key):
	a, b = key
	ciphered_text = ''

	for letter in text:
		ciphered_letter_index = (a * monogram_to_number[letter] + b) % alphabet_length

		ciphered_text += alphabet_str[ciphered_letter_index]

	return ciphered_text


def bi_affine_cipher(text, key):
	a, b = key
	ciphered_text = ''

	for bigram in re.findall(r'..', text):
		ciphered_letter_index = (a * bigram_to_number[bigram] + b) % (all_bigrams_length)

		ciphered_text += all_bigrams[ciphered_letter_index]

	return ciphered_text


## Uniform distortion

In [9]:
def mono_uniform_distortion(size):
    return ''.join([alphabet_str[elem] for elem in np.random.randint(low=0, high=alphabet_length, size=size)])

def bi_uniform_distortion(size):
    return ''.join([all_bigrams[elem] for elem in np.random.randint(low=0, high=all_bigrams_length, size=size)])


## Recurrent sequence

In [10]:
def generate_random_lgram(high = alphabet_length, size = 1):
	return np.random.randint(low = 0, high = high, size = size)

def mono_recurrent_sequence(size):
	s_0, s_1 = generate_random_lgram(alphabet_length, 2)
	recurrent_sequence = alphabet_str[s_0] + alphabet_str[s_1]
	
	for i in range(2, size):
		prev_letter_index = monogram_to_number[recurrent_sequence[i - 1]]
		prev_prev_letter_index = monogram_to_number[recurrent_sequence[i - 2]]
		next_letter_index = (prev_letter_index + prev_prev_letter_index) % alphabet_length

		recurrent_sequence += alphabet_str[next_letter_index]

	return recurrent_sequence

def bi_recurrent_sequence(size):
	s_0, s_1 = generate_random_lgram(all_bigrams_length, 2)
	recurrent_sequence = all_bigrams[s_0] + all_bigrams[s_1]
	
	for i in range(2, size // 2):
		prev_letter_index = bigram_to_number[recurrent_sequence[-2:]]
		prev_prev_letter_index = bigram_to_number[recurrent_sequence[-4:-2]]

		next_bigram_index = (prev_letter_index + prev_prev_letter_index) % (all_bigrams_length)

		recurrent_sequence += all_bigrams[next_bigram_index]

	return recurrent_sequence


# Criterias

In [11]:
def get_prohibited_grams(grams, quartile):
	entries = grams.items()
	return dict(sorted(entries, key = lambda tuple: tuple[1], reverse=True)[math.floor(quartile * len(entries)):])

prohibited_monograms = get_prohibited_grams(alphabet_statistics, 0.9)
prohibited_monograms_keys = list(prohibited_monograms.keys())

prohibited_bigrams = get_prohibited_grams(bigrams_statistics, 0.75)
prohibited_bigrams_keys = list(prohibited_bigrams.keys())


### Criteria 1.1 (criteria 1.0 in parameters)

In [12]:
def mono_criteria_1_1(text, k_prohibited = 1):
    a_ap = set(text)

    k_counter = 0

    for elem in a_ap:
        if elem in prohibited_monograms_keys:
            k_counter += 1

    if (k_counter >= k_prohibited):
        return 0
    else:
        return 1 

def bi_criteria_1_1(text, k_prohibited = 1):
    bigrams_text = [text[i:i+2] for i in range(len(text) - 1)]
    a_ap = set(bigrams_text)

    k_counter = 0

    for elem in a_ap:
        if elem in prohibited_bigrams_keys:
            k_counter += 1

    if (k_counter >= k_prohibited):
        return 0
    else:
        return 1


### Criteria 1.2

In [13]:
def mono_prohibited_frequencies_1(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for monogram in prohibited_monograms_keys:
		prohibited_frequencies[monogram] = 0

	for letter in text:
		if letter in prohibited_monograms_keys:
			prohibited_frequencies[letter] += 1

	for monogram in prohibited_monograms_keys:
		prohibited_frequencies[monogram] /= text_length

		if (prohibited_frequencies[monogram] >= limit):
			return 0
		
	return 1

def bi_prohibited_frequencies_1(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for i in range(text_length - 1):
		bigram = text[i:i + 2]

		if bigram in prohibited_bigrams_keys:
			try:
				prohibited_frequencies[bigram] += 1
			except KeyError:
				prohibited_frequencies[bigram] = 1

	for bigram in prohibited_bigrams_keys:
		prohibited_frequencies[bigram] /= (text_length - 1)

		if (prohibited_frequencies[bigram] >= limit):
			return 0
		
	return 1


### Criteria 1.3

In [14]:
def mono_prohibited_frequencies_2(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for monogram in prohibited_monograms_keys:
		prohibited_frequencies[monogram] = 0

	for letter in text:
		if letter in prohibited_monograms_keys:
			prohibited_frequencies[letter] += 1

	freq_sum = sum(prohibited_frequencies.values()) / text_length

	if freq_sum > limit:
		return 0
		
	return 1

def bi_prohibited_frequencies_2(text, limit):
	text_length = len(text)
	prohibited_frequencies = {}

	for i in range(text_length - 1):
		bigram = text[i:i + 2]

		if bigram in prohibited_bigrams_keys:
			try:
				prohibited_frequencies[bigram] += 1
			except KeyError:
				prohibited_frequencies[bigram] = 1

	freq_sum = sum(prohibited_frequencies.values()) / (text_length - 1)

	if freq_sum > limit:
		return 0
		
	return 1

kek122121 = 5

try:
    kek122121 += 1
except NameError:
    kek122121 = 2


### Criteria 3.0

In [15]:
def get_monogram_distribution(text):
    text_length = len(text)
    stats = {}

    for letter in alphabet_str:
        stats[letter] = 0
    for elem in text:
        stats[elem] += 1
    for key in stats.keys():
        stats[key] /= text_length
    return stats

def get_bigram_distribution(text):
    text_length = len(text)
    stats = {}

    for bigram in all_bigrams:
        stats[bigram] = 0
            
    for i in range(len(text) - 1):
        stats[text[i] + text[i + 1]] += 1

    for key in stats.keys():
        stats[key] /= (text_length - 1)

    return stats

def get_specific_entropy(frequencies, l):
    specific_entropy = 0

    for frequency in frequencies.keys():
        if frequencies[frequency] != 0:
            specific_entropy -= frequencies[frequency] * np.log2(frequencies[frequency]) / l

    return specific_entropy

def mono_criteria_3_0(text, limit):
    text_specific_entropy = get_specific_entropy(get_monogram_distribution(text), 1)

    result = abs(monogram_entropy - text_specific_entropy)

    if result > limit:
        return 0
    else:
        return 1

def bi_criteria_3_0(text, limit):
    text_specific_entropy = get_specific_entropy(get_bigram_distribution(text), 2)

    result = abs(bigram_entropy - text_specific_entropy)

    if result > limit:
        return 0
    else:
        return 1


### Criteria 5.0

In [16]:
def get_most_common_grams(grams, count):
	entries = grams.items()
	sorted_entries = sorted(entries, key = lambda tuple: tuple[1], reverse=True)[:count]
	
	return list(map(lambda item: item[0], sorted_entries))


In [86]:
most_common_monograms = get_most_common_grams(alphabet_statistics, 10)
most_common_bigrams_50 = get_most_common_grams(bigrams_statistics, 50)
most_common_bigrams_100 = get_most_common_grams(bigrams_statistics, 100)
most_common_bigrams_200 = get_most_common_grams(bigrams_statistics, 200)

def mono_empty_boxes(text, limit, most_common_monograms):
	most_common_monograms_in_text = {}

	for monogram in most_common_monograms:
		most_common_monograms_in_text[monogram] = 0

	for letter in text:
		if letter in most_common_monograms:
			most_common_monograms_in_text[letter] += 1

	if len(list(filter(lambda monogram: True if monogram[1] == 0 else False, most_common_monograms_in_text.items()))) > limit:
		return 0

	return 1

def bi_empty_boxes(text, limit, most_common_bigrams):
	most_common_bigrams_in_text = {}

	for bigram in most_common_bigrams:
		most_common_bigrams_in_text[bigram] = 0

	for i in range(len(text) - 1):
		bigram = text[i:i + 2]

		if bigram in most_common_bigrams:
			most_common_bigrams_in_text[bigram] += 1

	if len(list(filter(lambda bigram: True if bigram[1] == 0 else False, most_common_bigrams_in_text.items()))) > limit:
		return 0

	return 1


### Structure criteria

In [18]:
def structure_criteria(text, limit = 0.25):
	text_len = len(text)
	random_text = mono_uniform_distortion(text)

	random_coef = text_len / len(zlib.compress(random_text.encode('utf-8')))
	real_coef = text_len / len(zlib.compress(text.encode('utf-8')))

	if abs(random_coef - real_coef) < limit:
		return 0

	return 1


# Distortion texts

## Monograms

In [19]:
viginere_key_1 = generate_random_lgram(alphabet_length)
viginere_key_5 = generate_random_lgram(alphabet_length, 5)
viginere_key_10 = generate_random_lgram(alphabet_length, 10)

affine_key = generate_random_lgram(alphabet_length, 2)

while affine_key[0] % 2 != 1:
	affine_key = generate_random_lgram(alphabet_length, 2)

text_10_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_10],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_10],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_10],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_10],
	'random': [mono_uniform_distortion(10) for i in range(len(texts_10))],
	'reccurent': [mono_recurrent_sequence(10) for i in range(len(texts_10))]
}

text_100_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_100],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_100],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_100],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_100],
	'random': [mono_uniform_distortion(100) for i in range(len(texts_100))],
	'reccurent': [mono_recurrent_sequence(100) for i in range(len(texts_100))]
}

text_1000_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_1000],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_10000],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_1000],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_1000],
	'random': [mono_uniform_distortion(1000) for i in range(len(texts_1000))],
	'reccurent': [mono_recurrent_sequence(1000) for i in range(len(texts_1000))]
}

text_10000_mono = {
	'viginere_1': [mono_vigenere_cipher(text, viginere_key_1) for text in texts_10000],
	'viginere_5': [mono_vigenere_cipher(text, viginere_key_5) for text in texts_10000],
	'viginere_10': [mono_vigenere_cipher(text, viginere_key_10) for text in texts_10000],
	'affine': [mono_affine_cipher(text, affine_key) for text in texts_10000],
	'random': [mono_uniform_distortion(10000) for i in range(len(texts_10000))],
	'reccurent': [mono_recurrent_sequence(10000) for i in range(len(texts_10000))]
}


## Bigrams

In [20]:
viginere_key_1 = generate_random_lgram(all_bigrams_length)
viginere_key_5 = generate_random_lgram(all_bigrams_length, 5)
viginere_key_10 = generate_random_lgram(all_bigrams_length, 10)

affine_key = generate_random_lgram(all_bigrams_length, 2)

while affine_key[0] % 2 != 1:
	affine_key = generate_random_lgram(all_bigrams_length, 2)

text_10_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_10],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_10],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_10],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_10],
	'random': [bi_uniform_distortion(10) for i in range(len(texts_10))],
	'reccurent': [bi_recurrent_sequence(10) for i in range(len(texts_10))]
}

text_100_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_100],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_100],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_100],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_100],
	'random': [bi_uniform_distortion(100) for i in range(len(texts_100))],
	'reccurent': [bi_recurrent_sequence(100) for i in range(len(texts_100))]
}

text_1000_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_1000],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_1000],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_1000],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_1000],
	'random': [bi_uniform_distortion(1000) for i in range(len(texts_1000))],
	'reccurent': [bi_recurrent_sequence(1000) for i in range(len(texts_1000))]
}

text_10000_bi = {
	'viginere_1': [bi_vigenere_cipher(text, viginere_key_1) for text in texts_10000],
	'viginere_5': [bi_vigenere_cipher(text, viginere_key_5) for text in texts_10000],
	'viginere_10': [bi_vigenere_cipher(text, viginere_key_10) for text in texts_10000],
	'affine': [bi_affine_cipher(text, affine_key) for text in texts_10000],
	'random': [bi_uniform_distortion(10000) for i in range(len(texts_10000))],
	'reccurent': [bi_recurrent_sequence(10000) for i in range(len(texts_10000))]
}


In [21]:
def test_criteria_for_specific_distortion(criteria, text_array):
	def inner(*args):
		results = [criteria(text, *args) for text in text_array]

		return sum(results) / len(text_array)

	return inner


In [22]:
def test_criteria(criteria, text_array_dict):
	def inner(*args):
		for key in text_array_dict.keys():
			print('   ', key, test_criteria_for_specific_distortion(criteria, text_array_dict[key])(*args))

	return inner


# Criteria 1.0

## Real

In [None]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10)())

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_100)())

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_1000)())

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10000)())

## Monogram

In [None]:
print('L == 10')
test_criteria(mono_criteria_1_1, text_10_mono)()

print('L == 100')
test_criteria(mono_criteria_1_1, text_100_mono)()

print('L == 1000')
test_criteria(mono_criteria_1_1, text_1000_mono)()

print('L == 10000')
test_criteria(mono_criteria_1_1, text_10000_mono)()

## Bigram

In [None]:
print('L == 10')
test_criteria(bi_criteria_1_1, text_10_bi)()

print('L == 100')
test_criteria(bi_criteria_1_1, text_100_bi)()

print('L == 1000')
test_criteria(bi_criteria_1_1, text_1000_bi)()

print('L == 10000')
test_criteria(bi_criteria_1_1, text_10000_bi)()

# Criteria 1.1

## Real monograms

In [None]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10)(1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_100)(3))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_1000)(4))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_1_1, texts_10000)(4))

## Distorted monogram

In [None]:
print('L == 10')
test_criteria(mono_criteria_1_1, text_10_mono)(1)

print('L == 100')
test_criteria(mono_criteria_1_1, text_100_mono)(3)

print('L == 1000')
test_criteria(mono_criteria_1_1, text_1000_mono)(4)

print('L == 10000')
test_criteria(mono_criteria_1_1, text_10000_mono)(4)

## Real bigram

In [None]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_10)(1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_100)(10))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_1000)(50))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_1_1, texts_10000)(100))

## Distorted bigram

In [None]:
print('L == 10')
test_criteria(bi_criteria_1_1, text_10_bi)(1)

print('L == 100')
test_criteria(bi_criteria_1_1, text_100_bi)(10)

print('L == 1000')
test_criteria(bi_criteria_1_1, text_1000_bi)(50)

print('L == 10000')
test_criteria(bi_criteria_1_1, text_10000_bi)(100)

# Criteria 1.2

## Real monogram

In [None]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_10)(0.1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_100)(0.025))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_1000)(0.015))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_1, texts_10000)(0.008))

## Distorted monogram

In [None]:
print('L == 10')
test_criteria(mono_prohibited_frequencies_1, text_10_mono)(0.1)

print('L == 100')
test_criteria(mono_prohibited_frequencies_1, text_100_mono)(0.025)

print('L == 1000')
test_criteria(mono_prohibited_frequencies_1, text_1000_mono)(0.015)

print('L == 10000')
test_criteria(mono_prohibited_frequencies_1, text_10000_mono)(0.008)

## Real bigram

In [None]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_10)(0.1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_100)(0.0125))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_1000)(0.0035))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_1, texts_10000)(0.001))

## Distorted bigram

In [None]:
print('L == 10')
test_criteria(bi_prohibited_frequencies_1, text_10_bi)(0.1)

print('L == 100')
test_criteria(bi_prohibited_frequencies_1, text_100_bi)(0.0125)

print('L == 1000')
test_criteria(bi_prohibited_frequencies_1, text_1000_bi)(0.0035)

print('L == 10000')
test_criteria(bi_prohibited_frequencies_1, text_10000_bi)(0.001)

# Criteria 1.3

## Real monogram

In [None]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_10)(0.1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_100)(0.05))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_1000)(0.025))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_prohibited_frequencies_2, texts_10000)(0.02))

## Distorted monogram

In [None]:
print('L == 10')
test_criteria(mono_prohibited_frequencies_2, text_10_mono)(0.1)

print('L == 100')
test_criteria(mono_prohibited_frequencies_2, text_100_mono)(0.05)

print('L == 1000')
test_criteria(mono_prohibited_frequencies_2, text_1000_mono)(0.025)

print('L == 10000')
test_criteria(mono_prohibited_frequencies_2, text_10000_mono)(0.02)

## Real Bigram

In [278]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_10)(0.075))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_100)(0.025))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_1000)(0.015))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_prohibited_frequencies_2, texts_10000)(0.008))

L == 10
    0.9828
L == 100
    0.9977
L == 1000


KeyboardInterrupt: 

## Distorted Bigram

In [279]:
print('L == 10')
test_criteria(bi_prohibited_frequencies_2, text_10_bi)(0.075)

print('L == 100')
test_criteria(bi_prohibited_frequencies_2, text_100_bi)(0.025)

print('L == 1000')
test_criteria(bi_prohibited_frequencies_2, text_1000_bi)(0.015)

print('L == 10000')
test_criteria(bi_prohibited_frequencies_2, text_10000_bi)(0.008)

L == 10
    viginere_1 0.079
    viginere_5 0.081
    viginere_10 0.0825
    affine 0.0663
    random 0.0384
    reccurent 0.0953
L == 100
    viginere_1 0.0


KeyboardInterrupt: 

# Criteria 3.0

## Real monogram

In [52]:
# print('L == 10')
# print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_10)(1.95))

# print('L == 100')
# print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_100)(0.1))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_1000)(0.08))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_criteria_3_0, texts_10000)(0.025))

L == 1000
    0.9363
L == 10000
    0.961


## Distorted monogram

In [55]:
# print('L == 10')
# test_criteria(mono_criteria_3_0, text_10_mono)(1.95)

# print('L == 100')
# test_criteria(mono_criteria_3_0, text_100_mono)(0.1)

print('L == 1000')
test_criteria(mono_criteria_3_0, text_1000_mono)(0.08)

print('L == 10000')
test_criteria(mono_criteria_3_0, text_10000_mono)(0.025)

L == 1000
    viginere_1 0.9363
    viginere_5 0.0
    viginere_10 0.0
    affine 0.9363
    random 0.0
    reccurent 0.0
L == 10000
    viginere_1 0.961
    viginere_5 0.0
    viginere_10 0.0
    affine 0.961
    random 0.0
    reccurent 0.0


## Real bigram

In [None]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_10)(0.1))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_100)(0.05))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_1000)(0.025))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_criteria_3_0, texts_10000)(0.02))

## Distorted bigram

In [None]:
print('L == 10')
test_criteria(bi_criteria_3_0, text_10_bi)(1)

print('L == 100')
test_criteria(bi_criteria_3_0, text_100_bi)(10)

print('L == 1000')
test_criteria(bi_criteria_3_0, text_1000_bi)(50)

print('L == 10000')
test_criteria(bi_criteria_1_1, text_10000_bi)(100)

# Criteria 5.1

## Real monogram

In [81]:
print('L == 10')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_10)(7, most_common_monograms))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_100)(0, most_common_monograms))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_1000)(0, most_common_monograms))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(mono_empty_boxes, texts_10000)(0, most_common_monograms))

L == 10
    0.9721
L == 100
    0.9612
L == 1000
    1.0
L == 10000
    1.0


## Distorted Monogram

In [80]:
print('L == 10')
test_criteria(mono_empty_boxes, text_10_mono)(7, most_common_monograms)

print('L == 100')
test_criteria(mono_empty_boxes, text_100_mono)(0, most_common_monograms)

print('L == 1000')
test_criteria(mono_empty_boxes, text_1000_mono)(0, most_common_monograms)

print('L == 10000')
test_criteria(mono_empty_boxes, text_10000_mono)(0, most_common_monograms)

L == 10
    viginere_1 0.4534
    viginere_5 0.6938
    viginere_10 0.5053
    affine 0.4709
    random 0.5675
    reccurent 0.5004
L == 100
    viginere_1 0.0054
    viginere_5 0.7038
    viginere_10 0.5192
    affine 0.1477
    random 0.6482
    reccurent 0.0
L == 1000
    viginere_1 0.5087
    viginere_5 1.0
    viginere_10 1.0
    affine 0.9988
    random 1.0
    reccurent 0.0
L == 10000
    viginere_1 0.997
    viginere_5 1.0
    viginere_10 1.0
    affine 1.0
    random 1.0
    reccurent 0.0


## Real bigram

In [92]:
# print('L == 10')
# print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_10)(15, most_common_bigrams_100))

print('L == 100')
print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_100)(30, most_common_bigrams_50))

print('L == 1000')
print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_1000)(10, most_common_bigrams_100))

print('L == 10000')
print('   ', test_criteria_for_specific_distortion(bi_empty_boxes, texts_10000)(0, most_common_bigrams_200))


L == 100
    0.9443
L == 1000
    1.0
L == 10000
    0.991


## Distorted Bigram

In [94]:
# print('L == 10')
# test_criteria(bi_empty_boxes, text_10_bi)(1)

print('L == 100')
test_criteria(bi_empty_boxes, text_100_bi)(30, most_common_bigrams_50)

print('L == 1000')
test_criteria(bi_empty_boxes, text_1000_bi)(10, most_common_bigrams_100)

print('L == 10000')
test_criteria(bi_empty_boxes, text_10000_bi)(0, most_common_bigrams_200)

L == 100
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.0002
    reccurent 0.0
L == 1000
    viginere_1 0.0
    viginere_5 0.0
    viginere_10 0.0
    affine 0.0
    random 0.1532
    reccurent 0.0
L == 10000
    viginere_1 0.0
    viginere_5 0.2
    viginere_10 0.525
    affine 0.0
    random 1.0
    reccurent 0.0


# Criteria ?.?

In [None]:
print('L == 10')
test_criteria(mono_prohibited_frequencies_1, text_10_mono)(3)

print('L == 100')
test_criteria(mono_prohibited_frequencies_1, text_100_mono)(2)

print('L == 1000')
test_criteria(mono_prohibited_frequencies_1, text_1000_mono)(1)

print('L == 10000')
test_criteria(mono_prohibited_frequencies_1, text_10000_mono)(1)

In [None]:
print('L == 10')
test_criteria(bi_criteria_1_1, text_10_bi)(1)

print('L == 100')
test_criteria(bi_criteria_1_1, text_100_bi)(10)

print('L == 1000')
test_criteria(bi_criteria_1_1, text_1000_bi)(50)

print('L == 10000')
test_criteria(bi_criteria_1_1, text_10000_bi)(100)