## Imports

In [None]:
import re
import numpy as np
import math


## Read text

In [None]:
def read_text(path):
    lines = ''

    with open(path, 'r', encoding='utf-8') as file:
        lines = file.read()

    return lines

def write_text(path, lines):
    with open(path, 'w', encoding='utf-8') as file:
        file.write(lines)


path = '.\\texts\\roksolana.txt'
text = read_text(path).lower()

formatted_text = re.sub(u'[^а-яіґєї]|э|ы|ъ', '', text)
formatted_text = re.sub(u'ґ', 'г', formatted_text)

text_len = len(formatted_text)

write_text('.\\texts\\formatted_text.txt', formatted_text)

## Gathering monograms and bigrams statistics

In [None]:
alphabet_str = 'абвгдеєжзиіїйклмнопрстуфхцчшщьюя'
alphabet_length = len(alphabet_str)

alphabet_statistics = {}
alphabet_frequencies = {}

for letter in alphabet_str:
    alphabet_statistics[letter] = 0

for letter in formatted_text:
    alphabet_statistics[letter] += 1

for letter in alphabet_statistics.keys():
    alphabet_frequencies[letter] = alphabet_statistics[letter] / text_len


In [None]:
all_bigrams = []

bigrams_statistics = {}
bigrams_frequencies = {}

for first in alphabet_str:
    for second in alphabet_str:
        bigram = first + second

        all_bigrams.append(bigram)
        bigrams_statistics[bigram] = 0
        bigrams_frequencies[bigram] = 0

for index in range(text_len - 1):
    bigrams_statistics[formatted_text[index] + formatted_text[index + 1]] += 1

for bigram in bigrams_frequencies.keys():
    bigrams_frequencies[bigram] = bigrams_statistics[bigram] / (text_len - 1)


## Entropy and Coincidence Index

In [None]:
entropy = -1 * sum([alphabet_frequencies[letter] * np.log2(alphabet_frequencies[letter]) for letter in alphabet_str])

coincidence_index = sum([
	alphabet_statistics[letter] * (alphabet_statistics[letter] - 1)
for letter in alphabet_str]) / text_len / (text_len - 1)


## Generating texts

In [None]:
texts_count = 10000

def generate_texts(text_size, texts_count):
	text_array = []

	index = 0
	for n in range(texts_count):
		text_array.append(formatted_text[index:(index + text_size)])
		index += 100

	return text_array

texts_10 = generate_texts(10, 10000)
texts_100 = generate_texts(100, 10000)
texts_1000 = generate_texts(1000, 10000)
texts_10000 = generate_texts(10000, 1000)


## Vigenere cipher

In [None]:
def vigenere_cipher(text, key):
    alphabet = list(alphabet_statistics.keys())

    length = len(alphabet)
    key_length = len(key)

    ciphered_text = ''
    for index in range(len(text)):
        key_index = index % key_length
        ciphered_text += alphabet[(alphabet.index(text[index]) + alphabet.index(key[key_index])) % length]

    return ciphered_text

## Affine cipher

In [None]:
def mono_affine_cipher(text, key):
	a, b = key
	ciphered_text = ''

	for letter in text:
		plain_text_index = alphabet_str.index(letter)
		ciphered_letter_index = (a * plain_text_index + b) % alphabet_length

		ciphered_text += alphabet_str[ciphered_letter_index]

	return ciphered_text


def bi_affine_cipher(text, key):
	a, b = key
	ciphered_text = ''

	for bigram in re.findall(r'..', text):
		print(bigram)
		plain_text_index = all_bigrams.index(bigram)
		ciphered_letter_index = (a * plain_text_index + b) % (alphabet_length ** 2)

		ciphered_text += all_bigrams[ciphered_letter_index]

	return ciphered_text


## Uniform distortion

In [None]:
def mono_uniform_distortion(text):
    alphabet = list(alphabet_statistics.keys())
    ciphered_text_array = [alphabet[math.floor(elem)] for elem in np.random.randint(low=0, high=alphabet_length, size=len(text))]
    print(ciphered_text_array)
    
    ciphered_text = ''
    for elem in ciphered_text_array:
        ciphered_text += elem
    return ciphered_text

def bi_uniform_distortion(text):
    ciphered_text = ''
    keys = np.random.randint(low=0, high=len(all_bigrams), size=len(text) // 2)
    
    for key in keys:
        ciphered_text += all_bigrams[key]

    return ciphered_text

## Recurrent sequence

In [None]:
def generateRandomLGram(high = alphabet_length):
	return math.floor(np.random.randint(low = 0, high = high, size = 1))

def mono_recurrent_sequence(text):
	recurrent_sequence = alphabet_str[generateRandomLGram()] + alphabet_str[generateRandomLGram()]
	
	for i in range(2, len(text)):
		prev_letter_index = alphabet_str.index(recurrent_sequence[i - 1])
		prev_prev_letter_index = alphabet_str.index(recurrent_sequence[i - 2])
		next_letter_index = (prev_letter_index + prev_prev_letter_index) % alphabet_length

		recurrent_sequence += alphabet_str[next_letter_index]

	return recurrent_sequence

def bi_recurrent_sequence(text):
	recurrent_sequence = all_bigrams[generateRandomLGram(alphabet_length ** 2)] + all_bigrams[generateRandomLGram(alphabet_length ** 2)]
	
	for i in range(2, len(text) // 2):
		prev_letter_index = all_bigrams.index(recurrent_sequence[-2:])
		prev_prev_letter_index = all_bigrams.index(recurrent_sequence[-4:-2])

		next_bigram_index = (prev_letter_index + prev_prev_letter_index) % (alphabet_length ** 2)

		recurrent_sequence += all_bigrams[next_bigram_index]

	return recurrent_sequence

bi_recurrent_sequence('лолкек')

# Criterias

### Criteria 1.0