In [28]:
import numpy as np
import re, textwrap
import math as m
import random

from collections import Counter
import nltk

import sys
import gzip
from base64 import b64encode, b64decode

**1. Предварительная обработка текста**

In [2]:
with open("Dreiser.txt", 'rb') as file: 
    TEXT = file.read().decode('utf-8')
    
pattern_1 = r'ґ'
pattern_2 = r'[^абвгдеєжзиіїйклмнопрстуфхцчшщьюя]'

TEXT = re.sub(pattern_1, 'г', TEXT.lower())
TEXT = re.sub(pattern_2, '', TEXT)

**2. Расчёт статистических данных текста**

In [4]:
ukr_clean_alphabet = 'абвгдеєжзиіїйклмнопрстуфхцчшщьюя'

In [5]:
def letter_counter(text):
    alphabet = dict.fromkeys(list(ukr_clean_alphabet), 0)
    
    for letter in text:
        alphabet[letter] += 1
    
    return alphabet

In [6]:
letter_counter(TEXT)

{'а': 115516,
 'б': 28941,
 'в': 84841,
 'г': 24462,
 'д': 52838,
 'е': 74034,
 'є': 5133,
 'ж': 15228,
 'з': 33899,
 'и': 87619,
 'і': 84161,
 'ї': 10210,
 'й': 23372,
 'к': 45594,
 'л': 52192,
 'м': 43822,
 'н': 90412,
 'о': 137885,
 'п': 39320,
 'р': 57405,
 'с': 56137,
 'т': 70682,
 'у': 48930,
 'ф': 2513,
 'х': 13910,
 'ц': 10704,
 'ч': 20649,
 'ш': 11445,
 'щ': 10298,
 'ь': 21038,
 'ю': 11669,
 'я': 32091}

In [7]:
N = 1416950

1) Частота букв в тексте

In [8]:
def dictionary_sorting(dictionary):
    return [(i, round(dictionary[i]/N, 5)) for i in sorted(dictionary.keys(), key=dictionary.get, reverse=True)]

dictionary_sorting(letter_counter(TEXT))

[('о', 0.09731),
 ('а', 0.08152),
 ('н', 0.06381),
 ('и', 0.06184),
 ('в', 0.05988),
 ('і', 0.0594),
 ('е', 0.05225),
 ('т', 0.04988),
 ('р', 0.04051),
 ('с', 0.03962),
 ('д', 0.03729),
 ('л', 0.03683),
 ('у', 0.03453),
 ('к', 0.03218),
 ('м', 0.03093),
 ('п', 0.02775),
 ('з', 0.02392),
 ('я', 0.02265),
 ('б', 0.02042),
 ('г', 0.01726),
 ('й', 0.01649),
 ('ь', 0.01485),
 ('ч', 0.01457),
 ('ж', 0.01075),
 ('х', 0.00982),
 ('ю', 0.00824),
 ('ш', 0.00808),
 ('ц', 0.00755),
 ('щ', 0.00727),
 ('ї', 0.00721),
 ('є', 0.00362),
 ('ф', 0.00177)]

In [9]:
def letter_frequency(dictionary):
    return [(i, round(dictionary[i]/N, 5)) for i in dictionary.keys()]

letter_frequency(letter_counter(TEXT))

[('а', 0.08152),
 ('б', 0.02042),
 ('в', 0.05988),
 ('г', 0.01726),
 ('д', 0.03729),
 ('е', 0.05225),
 ('є', 0.00362),
 ('ж', 0.01075),
 ('з', 0.02392),
 ('и', 0.06184),
 ('і', 0.0594),
 ('ї', 0.00721),
 ('й', 0.01649),
 ('к', 0.03218),
 ('л', 0.03683),
 ('м', 0.03093),
 ('н', 0.06381),
 ('о', 0.09731),
 ('п', 0.02775),
 ('р', 0.04051),
 ('с', 0.03962),
 ('т', 0.04988),
 ('у', 0.03453),
 ('ф', 0.00177),
 ('х', 0.00982),
 ('ц', 0.00755),
 ('ч', 0.01457),
 ('ш', 0.00808),
 ('щ', 0.00727),
 ('ь', 0.01485),
 ('ю', 0.00824),
 ('я', 0.02265)]

2) Частоты биграмм в тексте

In [10]:
def bigrams_counter(text):
    cnt = Counter(list(nltk.ngrams(text, n=2)))
    sort_dict = sorted(dict(cnt).items(), key = lambda x: x[1], reverse = True)
    
    return sort_dict

bigrams_counter(TEXT)

[(('н', 'а'), 19855),
 (('о', 'в'), 16670),
 (('в', 'і'), 15719),
 (('н', 'е'), 15150),
 (('т', 'и'), 15028),
 (('п', 'о'), 14106),
 (('р', 'о'), 13916),
 (('а', 'в'), 13369),
 (('г', 'о'), 12906),
 (('е', 'р'), 12255),
 (('с', 'т'), 12069),
 (('о', 'г'), 12050),
 (('л', 'а'), 11945),
 (('л', 'и'), 11648),
 (('т', 'а'), 11432),
 (('і', 'н'), 11229),
 (('в', 'и'), 10998),
 (('о', 'н'), 10875),
 (('в', 'о'), 10742),
 (('о', 'м'), 10737),
 (('и', 'в'), 10702),
 (('в', 'а'), 10571),
 (('п', 'р'), 10526),
 (('а', 'т'), 10353),
 (('а', 'л'), 10295),
 (('н', 'о'), 10004),
 (('н', 'і'), 9966),
 (('р', 'а'), 9660),
 (('е', 'н'), 9624),
 (('о', 'д'), 9594),
 (('о', 'б'), 9551),
 (('і', 'д'), 9465),
 (('д', 'о'), 9339),
 (('і', 'в'), 8883),
 (('к', 'о'), 8864),
 (('о', 'с'), 8803),
 (('и', 'н'), 8754),
 (('н', 'и'), 8679),
 (('з', 'а'), 8646),
 (('а', 'н'), 8513),
 (('и', 'т'), 8497),
 (('т', 'о'), 8434),
 (('с', 'я'), 8251),
 (('щ', 'о'), 7913),
 (('у', 'в'), 7758),
 (('и', 'с'), 7744),
 (('я', 

3) H1, H2

In [11]:
def entropy_letter(text):
    x = len(letter_frequency(letter_counter(text)))
    H1 = 0
    for i in range(x):
        H1 += (letter_frequency(letter_counter(text))[i][1] * m.log2(letter_frequency(letter_counter(text))[i][1])) * (-1)
    
    return H1
        
def entropy_bigrams(text):
    l = []
    for i in range(941):
        l.append(bigrams_counter(text)[i][1])
    
    x = 941 
    N_bigrams = 1416949
    
    H2 = 0
    for i in range(x):
        H2 += (((l[i]) / N_bigrams) * m.log2(((l[i]) / N_bigrams))) * (-1)
    
    return H2 * 0.5

In [12]:
entropy_letter(TEXT)

4.592872413230112

In [13]:
entropy_bigrams(TEXT)

4.216828505470758

4) I1, I2

In [14]:
def conformity_index_letter(text):
    L = len(text)
    return (1/(L * (L-1))) * sum(letter_counter(text)[i] * (letter_counter(text)[i] - 1) for i in ukr_clean_alphabet)

def conformity_index_bigrams(text):
    L = len(text)
    
    l = []
    for i in range(941):
        l.append(bigrams_counter(text)[i][1])
    
    I2 = 0
    for i in range(941):
        I2 += l[i] * (l[i] - 1)
    
    return I2 / (L * (L - 1))

In [15]:
conformity_index_letter(TEXT)

0.04928235830845431

In [16]:
conformity_index_bigrams(TEXT)

0.004381493632890787

**3. Вспомогательные функции**

In [17]:
#char ---> int
def text_to_array_of_num(text):
    return [ukr_clean_alphabet.find(i) for i in text]

#int ---> char
def array_of_numbers_to_text(num):
    return ''.join([ukr_clean_alphabet[i] for i in num])

In [519]:
def disjoint_bigrams(text):
    bigram = {}
    for i in range(m.ceil((len(text) - 1) / 2)):
        if ''.join(text[2*i : 2*i + 2]) in bigram:
            bigram[''.join(text[2*i : 2*i + 2])] += 1
        else:
            bigram[''.join(text[2*i : 2*i + 2])] = 1
            
    for i, j in bigram.items():
        bigram[i] = j / sum(bigram.values())
    
    
    return bigram

In [523]:
disjoint_bigrams(TEXT)

{'те': 0.00429796393662444,
 'од': 0.00688657977460801,
 'ор': 0.0043079083260792765,
 'др': 0.001426412833847082,
 'ай': 0.005248657918709924,
 'зе': 0.0006032590227308033,
 'ра': 0.006837693010390672,
 'ме': 0.0023555069869240722,
 'ри': 0.005242446522423644,
 'ка': 0.005412192228993554,
 'нс': 0.0005730371643025026,
 'ьк': 0.002701746456344744,
 'ат': 0.007741454389585696,
 'ге': 0.0001951207731173077,
 'ді': 0.0031821321310542346,
 'ят': 0.002370305516660705,
 'ео': 0.0006202005063038667,
 'до': 0.006911882240566761,
 'рд': 0.00045433889095735763,
 'йз': 0.0005360917606223507,
 'ер': 0.009210614833614883,
 'ам': 0.004404123621040376,
 'ик': 0.002496702661775823,
 'ан': 0.006469285793326917,
 'сь': 0.0031003769902233787,
 'тр': 0.003409238403227999,
 'аг': 0.002207484580872557,
 'ед': 0.002686335908206299,
 'ія': 0.0014241693853444548,
 'ро': 0.010830357068508479,
 'ма': 0.004641982442046147,
 'нп': 0.0008522694042582722,
 'ек': 0.0015038835533649165,
 'ла': 0.009518221767192268,
 '

In [360]:
def find_index(bigram):
    return ukr_clean_alphabet.find(bigram[0]) * len(ukr_clean_alphabet) + ukr_clean_alphabet.find(bigram[1])

In [370]:
find_index('до')

145

In [368]:
def bigram(index):
    return ''.join([ukr_clean_alphabet[index // 32], ukr_clean_alphabet[index % 32]])

In [369]:
bigram(145)

'до'

In [371]:
def conc(it, n):
    return zip(*([iter(it)] * n))

In [372]:
def text_coding_by_bigram(text):
    encode_text = []
    
    for bigram in conc(text, 2):
        encode_text.append(find_index(bigram))
    
    return encode_text

In [373]:
text_coding_by_bigram('арполдун')

[19, 593, 452, 720]

In [374]:
def text_decoding_by_bigram(text):
    decoding_text = ''
    
    for i in text:
        decoding_text += bigram(i)
    
    return decoding_text      

In [375]:
text_decoding_by_bigram([12, 34, 456, 23])

'айбвлзаф'

**4. Генерирование текстов**

In [18]:
def generate_text_samples(text, L, N):
    X = []
    end_of_random_number = len(text) - L - 1
    
    for i in range(N):
        pos_l = []
        position = np.random.randint(end_of_random_number)
        pos_l.append(position)
        pos_l.append(position + L)
        X.append(pos_l)
    
    text_list = []
    for i in range(len(X)):
        text_list.append(text[X[i][0]:X[i][1]])
    
    return text_list

In [87]:
texts_10_10000 = []
texts_10_10000  = generate_text_samples(TEXT, 10, 10000)

In [23]:
texts_100_10000 = []
texts_100_10000  = generate_text_samples(TEXT, 100, 10000)

In [24]:
texts_1000_10000 = []
texts_1000_10000 = generate_text_samples(TEXT, 1000, 10000)

In [25]:
texts_10000_1000 = []
texts_10000_1000  = generate_text_samples(TEXT, 10000, 1000)

**5. Изменение текста**

1) l = 1 (функции для монограмм)

- VIGENERE_encryption_letter (r = 1, 5, 10) +
- Afine_encryption_letter +
- uniform_letter +
- s_letter +

In [26]:
def mod(x, y, z):
    return (x + y) % z

def random_key(length):
    letters = 'абвгдеєжзиіїйклмнопрстуфхцчшщьюя'
    rand_string = ''.join(random.choice(letters) for i in range(length))
    
    return rand_string

def key_initialization():
    temp = []
    
    k1 = random_key(1)
    k5 = random_key(5)
    k10 = random_key(10)
    
    temp.append(text_to_array_of_num(k1))
    temp.append(text_to_array_of_num(k5))
    temp.append(text_to_array_of_num(k10))
                
    return temp

In [29]:
key_initialization()

[[9], [11, 20, 24, 11, 6], [21, 19, 4, 1, 9, 2, 20, 24, 6, 13]]

In [30]:
def VIGENERE_encryption_letter(text, key):
    text = text_to_array_of_num(text)
    cipher = []
    
    for i in range(len(text)):
        cipher.append(mod(text[i], key[i % len(key)], 32))
    
    return array_of_numbers_to_text(cipher)    

In [31]:
VIGENERE_encryption_letter(texts_100_10000[0], key_initialization()[0])

'кужзаькьлбдеяжбпєялкємлзбрпчьяплюролйблюжаюжпвнкрьояжифвйлбькяжкяжечйвеолюлшяжкмнлоплоіьдвілйрквюрбч'

In [32]:
def Afine_letter(text):
    encrypt_text = []
    a = np.random.randint(32)
    b = np.random.randint(32)
    
    new_text = text_to_array_of_num(text)
    
    for i in new_text:
        encrypt_text.append(mod(a * i, b, 32))
   
    return array_of_numbers_to_text(encrypt_text)

In [33]:
Afine_letter('апрлвлг')

'ахсзхзс'

In [34]:
def uniform_letter(L):
    list_of_rand = []
    for i in range(L):
        list_of_rand.append(np.random.randint(32))
    
    return array_of_numbers_to_text(list_of_rand)

In [36]:
uniform_letter(100)

'ьрубтсещхуіффахнньтиаіфрмюеурїїнчжчиившьіивжвоумюяикгбикптощмяййьянечтцлкиечлєфеоміофуьжнвеьвілифйдї'

In [392]:
def s_letter(L):
    m = 32
    s0 = np.random.randint(32)
    s1 = np.random.randint(32)
    
    Y = [s0, s1] + [0] * (L - 2)
    
    for i in range(2, L):
        Y[i] = (Y[i-1] + Y[i-2]) % m 
    
    return array_of_numbers_to_text(Y)

In [393]:
s_letter(10)

'аїїубфхмжу'

2) l = 2 (функции для биграмм) 

- VIGENERE_encryption_bigram (r = 1, 5, 10)
- Afine_bigram +
- uniform_bigram +
- s_bigram

In [379]:
def Afine_bigram(text):
    
    l = []
    
    a = np.random.randint(1024)
    b = np.random.randint(1024)
    
    encrypt_text = text_coding_by_bigram(text)
    
    for i in encrypt_text:
        l.append((a * i + b) % 1024)
    
    return text_decoding_by_bigram(l)   

In [380]:
Afine_bigram('аолдукштив')

'югетцїзшги'

In [381]:
def uniform_bigram(L):
    l = []
    
    for i in range(L // 2):
        l.append(np.random.randint(1024))
    
    return text_decoding_by_bigram(l)

In [382]:
uniform_bigram(10)

'зсьїпляянж'

In [394]:
def s_bigram(L):
    m = 1024
    
    s0 = np.random.randint(1024)
    s1 = np.random.randint(1024)
    
    Y = [s0, s1] + [0] * (L // 2 - 2)
    
    for i in range(2, L // 2):
        Y[i] = (Y[i-1] + Y[i-2]) % m 
    
    return text_decoding_by_bigram(Y)
    

In [397]:
s_bigram(10)

'длахеєеюїд'

**6. Реализация критериев (частые l-граммы)**

1) l = 1

- criterion_20:
> 1. criterion_20_vigenere_r1_letter +
> 2. criterion_20_vigenere_r5_letter +
> 3. criterion_20_vigenere_r10_letter +
> 4. criterion_20_afine_letter +
> 5. criterion_20_uniform_letter +
> 6. criterion_20_s_letter +

- criterion_21:
> 1. criterion_21_vigenere_r1_letter +
> 2. criterion_21_vigenere_r5_letter + 
> 3. criterion_21_vigenere_r10_letter +
> 4. criterion_21_afine_letter +
> 5. criterion_21_uniform_letter +
> 6. criterion_21_s_letter +

- criterion_22:
> 1. criterion_22_vigenere_r1_letter +
> 2. criterion_22_vigenere_r5_letter +
> 3. criterion_22_vigenere_r10_letter +
> 4. criterion_22_afine_letter +
> 5. criterion_22_uniform_letter +
> 6. criterion_22_s_letter +

- criterion_23:
> 1. criterion_23_vigenere_r1_letter +
> 2. criterion_23_vigenere_r5_letter + 
> 3. criterion_23_vigenere_r10_letter + 
> 4. criterion_23_afine_letter + 
> 5. criterion_23_uniform_letter + 
> 6. criterion_23_s_letter +

- criterion_conformity
> 1. criterion_conformity_vigenere_r1_letter
> 2. criterion_conformity_vigenere_r5_letter
> 3. criterion_conformity_vigenere_r10_letter
> 4. criterion_conformity_afine_letter
> 5. criterion_conformity_uniform_letter
> 6. criterion_conformity_s_letter

- criterion_50
> 1. criterion_50_vigenere_r1_letter +
> 2. criterion_50_vigenere_r5_letter +
> 3. criterion_50_vigenere_r10_letter +
> 4. criterion_50_afine_letter +
> 5. criterion_50_uniform_letter +
> 6. criterion_50_s_letter +


- criterion_structural
> 1. criterion_structural_vigenere_r1_letter
> 2. criterion_structural_vigenere_r5_letter
> 3. criterion_structural_vigenere_r10_letter
> 4. criterion_structural_afine_letter
> 5. criterion_structural_uniform_letter
> 6. criterion_structural_s_letter

**----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------**

In [40]:
A = ['о','а', 'н', 'и', 'в', 'і', 'е', 'т', 'р', 'с', 'д', 'л', 'у', 'к', 'м', 'п', 'з', 'я', 'б', 
         'г', 'й', 'ь', 'ч', 'ж', 'х', 'ю', 'ш','ц', 'щ', 'ї', 'є', 'ф']

criterion_20(1)

In [100]:
def criterion_20_vigenere_r1_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) == len(A_frq):
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) != len(A_frq):
            H1 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [101]:
criterion_20_vigenere_r1_letter(texts_10_10000, 7)

(1.0, 0.0013)

In [102]:
criterion_20_vigenere_r1_letter(texts_100_10000, 15)

(0.9422, 0.8131)

In [103]:
criterion_20_vigenere_r1_letter(texts_1000_10000, 12)

(0.1323, 1.0)

In [104]:
criterion_20_vigenere_r1_letter(texts_10000_1000, 11)

(0.0, 1.0)

criterion_20(2)

In [105]:
def criterion_20_vigenere_r5_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) == len(A_frq):
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) != len(A_frq):
            H1 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [106]:
criterion_20_vigenere_r5_letter(texts_10_10000, 7)

(1.0, 0.0013)

In [107]:
criterion_20_vigenere_r5_letter(texts_100_10000, 7)

(0.4045, 0.9852)

In [108]:
criterion_20_vigenere_r5_letter(texts_1000_10000, 10)

(0.0, 1.0)

In [109]:
criterion_20_vigenere_r5_letter(texts_10000_1000, 11)

(0.0, 1.0)

criterion_20(3)

In [110]:
def criterion_20_vigenere_r10_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) == len(A_frq):
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) != len(A_frq):
            H1 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [111]:
criterion_20_vigenere_r10_letter(texts_10_10000, 7)

(1.0, 0.0013)

In [112]:
criterion_20_vigenere_r10_letter(texts_100_10000, 16)

(0.601, 0.7709)

In [113]:
criterion_20_vigenere_r10_letter(texts_1000_10000, 20)

(0.0, 1.0)

In [114]:
criterion_20_vigenere_r10_letter(texts_10000_1000, 30)

(0.0, 1.0)

criterion_20(4)

In [115]:
def criterion_20_afine_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_letter(i))
        text_corrupt.append(l)
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) == len(A_frq):
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) != len(A_frq):
            H1 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [116]:
criterion_20_afine_letter(texts_10_10000, 6)

(1.0, 0.0077)

In [148]:
criterion_20_afine_letter(texts_100_10000, 9)

(0.9352, 0.9684)

In [118]:
criterion_20_afine_letter(texts_1000_10000, 25)

(0.6366, 0.999)

In [149]:
criterion_20_afine_letter(texts_10000_1000, 25)

(0.479, 1.0)

criterion_20(5)

In [120]:
def criterion_20_uniform_letter(open_texts, A_frq_size, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_letter(L))
        text_corrupt.append(l)
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) == len(A_frq):
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) != len(A_frq):
            H1 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [121]:
criterion_20_uniform_letter(texts_10_10000, 6, 10)

(0.9998, 0.0077)

In [122]:
criterion_20_uniform_letter(texts_100_10000, 9, 100)

(0.3236, 0.9684)

In [123]:
criterion_20_uniform_letter(texts_1000_10000, 9, 1000)

(0.0, 1.0)

In [124]:
criterion_20_uniform_letter(texts_10000_1000, 9, 10000)

(0.0, 1.0)

criterion_20(6)

In [168]:
def criterion_20_s_letter(open_texts, A_frq_size, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_letter(L))
        text_corrupt.append(l)
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) == len(A_frq):
            H0 += 1
    
    for i in range(len(text_corrupt)):
        tmp = list(text_corrupt[i][0])
        if len(list(set(tmp) & set(A_frq))) != len(A_frq):
            H1 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [169]:
criterion_20_s_letter(texts_10_10000, 6, 10)

(1.0, 0.0077)

In [170]:
criterion_20_s_letter(texts_100_10000, 9, 100)

(1.0, 0.9684)

In [171]:
criterion_20_s_letter(texts_1000_10000, 10, 1000)

(1.0, 1.0)

In [172]:
criterion_20_s_letter(texts_10000_1000, 25, 10000)

(1.0, 1.0)

**------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------**

criterion_21(1)

In [188]:
def criterion_21_vigenere_r1_letter(open_texts, A_frq_size, kf):
    FP = 0
    FN = 0
    
    A = ['о','а', 'н', 'и', 'в', 'і', 'е', 'т', 'р', 'с', 'д', 'л', 'у', 'к', 'м', 'п', 'з', 'я', 'б', 
         'г', 'й', 'ь', 'ч', 'ж', 'х', 'ю', 'ш','ц', 'щ', 'ї', 'є', 'ф']
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
        
    H1 = 0
    H0 = 0
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) <= kf:
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) > kf:
            H1 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [189]:
criterion_21_vigenere_r1_letter(texts_10_10000, 7, 2)

(0.2492, 0.1643)

In [193]:
criterion_21_vigenere_r1_letter(texts_100_10000, 7, 2)

(0.9983, 0.0)

In [194]:
criterion_21_vigenere_r1_letter(texts_1000_10000, 7, 2)

(1.0, 0.0)

In [195]:
criterion_21_vigenere_r1_letter(texts_10000_1000, 7, 2)

(1.0, 0.0)

criterion_21(2)

In [196]:
def criterion_21_vigenere_r5_letter(open_texts, A_frq_size, kf):
    FP = 0
    FN = 0
    
    A = ['о','а', 'н', 'и', 'в', 'і', 'е', 'т', 'р', 'с', 'д', 'л', 'у', 'к', 'м', 'п', 'з', 'я', 'б', 
         'г', 'й', 'ь', 'ч', 'ж', 'х', 'ю', 'ш','ц', 'щ', 'ї', 'є', 'ф']
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
        
    H1 = 0
    H0 = 0
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) <= kf:
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) > kf:
            H1 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [197]:
criterion_21_vigenere_r5_letter(texts_10_10000, 7, 2)

(0.2808, 0.1643)

In [205]:
criterion_21_vigenere_r5_letter(texts_100_10000, 6, 5)

(0.6495, 0.0057)

In [207]:
criterion_21_vigenere_r5_letter(texts_1000_10000, 10, 9)

(1.0, 0.0)

In [204]:
criterion_21_vigenere_r5_letter(texts_10000_1000, 6, 2)

(1.0, 0.0)

In [208]:
def criterion_21_vigenere_r10_letter(open_texts, A_frq_size, kf):
    FP = 0
    FN = 0
    
    A = ['о','а', 'н', 'и', 'в', 'і', 'е', 'т', 'р', 'с', 'д', 'л', 'у', 'к', 'м', 'п', 'з', 'я', 'б', 
         'г', 'й', 'ь', 'ч', 'ж', 'х', 'ю', 'ш','ц', 'щ', 'ї', 'є', 'ф']
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
        
    H1 = 0
    H0 = 0
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) <= kf:
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) > kf:
            H1 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [209]:
criterion_21_vigenere_r10_letter(texts_10_10000, 7, 2)

(0.2851, 0.1643)

In [211]:
criterion_21_vigenere_r10_letter(texts_100_10000, 9, 8)

(0.6061, 0.0316)

In [213]:
criterion_21_vigenere_r10_letter(texts_1000_10000, 9, 8)

(1.0, 0.0)

In [214]:
criterion_21_vigenere_r10_letter(texts_10000_1000, 9, 8)

(1.0, 0.0)

In [217]:
def criterion_21_afine_letter(open_texts, A_frq_size, kf):
    FP = 0
    FN = 0
    
    A = ['о','а', 'н', 'и', 'в', 'і', 'е', 'т', 'р', 'с', 'д', 'л', 'у', 'к', 'м', 'п', 'з', 'я', 'б', 
         'г', 'й', 'ь', 'ч', 'ж', 'х', 'ю', 'ш','ц', 'щ', 'ї', 'є', 'ф']
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_letter(i))
        text_corrupt.append(l)
        
    H1 = 0
    H0 = 0
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) <= kf:
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) > kf:
            H1 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [218]:
criterion_21_afine_letter(texts_10_10000, 7, 2)

(0.177, 0.1643)

In [219]:
criterion_21_afine_letter(texts_100_10000, 7, 2)

(0.7639, 0.0)

In [220]:
criterion_21_afine_letter(texts_1000_10000, 10, 2)

(0.8133, 0.0)

In [221]:
criterion_21_afine_letter(texts_10000_1000, 10, 2)

(0.814, 0.0)

In [222]:
def criterion_21_uniform_letter(open_texts, A_frq_size, kf, L):
    FP = 0
    FN = 0
    
    A = ['о','а', 'н', 'и', 'в', 'і', 'е', 'т', 'р', 'с', 'д', 'л', 'у', 'к', 'м', 'п', 'з', 'я', 'б', 
         'г', 'й', 'ь', 'ч', 'ж', 'х', 'ю', 'ш','ц', 'щ', 'ї', 'є', 'ф']
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_letter(L))
        text_corrupt.append(l)
        
    H1 = 0
    H0 = 0
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) <= kf:
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) > kf:
            H1 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [223]:
criterion_21_uniform_letter(texts_10_10000, 7, 2, 10)

(0.2753, 0.1643)

In [225]:
criterion_21_uniform_letter(texts_100_10000, 10, 1, 100)

(1.0, 0.0)

In [226]:
criterion_21_uniform_letter(texts_1000_10000, 7, 2, 1000)

(1.0, 0.0)

In [227]:
criterion_21_uniform_letter(texts_10000_1000, 10, 5, 10000)

(1.0, 0.0)

In [228]:
def criterion_21_s_letter(open_texts, A_frq_size, kf, L):
    FP = 0
    FN = 0
    
    A = ['о','а', 'н', 'и', 'в', 'і', 'е', 'т', 'р', 'с', 'д', 'л', 'у', 'к', 'м', 'п', 'з', 'я', 'б', 
         'г', 'й', 'ь', 'ч', 'ж', 'х', 'ю', 'ш','ц', 'щ', 'ї', 'є', 'ф']
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_letter(L))
        text_corrupt.append(l)
        
    H1 = 0
    H0 = 0
    
    for i in open_texts:
        tmp = list(i)
        if len(list(set(tmp) & set(A_frq))) <= kf:
            H0 += 1
    
    for i in text_corrupt:
        tmp = list(i[0])
        if len(list(set(tmp) & set(A_frq))) > kf:
            H1 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [229]:
criterion_21_s_letter(texts_10_10000, 7, 2, 10)

(0.261, 0.1643)

In [230]:
criterion_21_s_letter(texts_100_10000, 7, 2, 100)

(0.8433, 0.0)

In [231]:
criterion_21_s_letter(texts_1000_10000, 7, 2, 1000)

(0.8451, 0.0)

In [232]:
criterion_21_s_letter(texts_10000_1000, 10, 2, 10000)

(0.853, 0.0)

**------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------**

criterion_22(1)

In [540]:
def criterion_22_vigenere_r1_letter(open_texts, A_frq_size, k):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] < k:
                H1 += 1
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] >= k:
                H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [541]:
criterion_22_vigenere_r1_letter(texts_10_10000, 10, 0.03)

(0.7364, 0.0)

In [236]:
criterion_22_vigenere_r1_letter(texts_100_10000, 10, 0.037)

(0.6487, 0.0)

In [237]:
criterion_22_vigenere_r1_letter(texts_1000_10000, 10, 0.04)

(0.6221, 0.0)

In [238]:
criterion_22_vigenere_r1_letter(texts_10000_1000, 10, 0.04)

(0.667, 0.0)

criterion_22(2)

In [240]:
def criterion_22_vigenere_r5_letter(open_texts, A_frq_size, k):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] < k:
                H1 += 1
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] >= k:
                H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [241]:
criterion_22_vigenere_r5_letter(texts_10_10000, 10, 0.03)

(0.7597, 0.0)

In [242]:
criterion_22_vigenere_r5_letter(texts_100_10000, 10, 0.03)

(0.666, 0.0)

In [243]:
criterion_22_vigenere_r5_letter(texts_1000_10000, 10, 0.03)

(0.6346, 0.0)

In [244]:
criterion_22_vigenere_r5_letter(texts_10000_1000, 10, 0.03)

(0.633, 0.0)

criterion_22(3)

In [245]:
def criterion_22_vigenere_r10_letter(open_texts, A_frq_size, k):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] < k:
                H1 += 1
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] >= k:
                H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [246]:
criterion_22_vigenere_r10_letter(texts_10_10000, 10, 0.03)

(0.7578, 0.0)

In [247]:
criterion_22_vigenere_r10_letter(texts_100_10000, 10, 0.03)

(0.673, 0.0)

In [248]:
criterion_22_vigenere_r10_letter(texts_1000_10000, 10, 0.03)

(0.6438, 0.0)

In [250]:
criterion_22_vigenere_r10_letter(texts_1000_10000, 10, 0.03)

(0.6403, 0.0)

criterion_22(4)

In [254]:
def criterion_22_afine_letter(open_texts, A_frq_size, k):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_letter(i))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] < k:
                H1 += 1
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] >= k:
                H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [255]:
criterion_22_afine_letter(texts_10_10000, 9, 0.03)

(0.6809, 0.0)

In [256]:
criterion_22_afine_letter(texts_100_10000, 9, 0.03)

(0.6056, 0.0)

In [260]:
criterion_22_afine_letter(texts_1000_10000, 9, 0.03)

(0.5842, 0.0)

In [261]:
criterion_22_afine_letter(texts_10000_1000, 9, 0.03)

(0.595, 0.0)

criteriin_22(5)

In [262]:
def criterion_22_uniform_letter(open_texts, A_frq_size, k, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_letter(L))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] < k:
                H1 += 1
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] >= k:
                H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [263]:
criterion_22_uniform_letter(texts_10_10000, 9, 0.03, 10)

(0.7299, 0.0)

In [264]:
criterion_22_uniform_letter(texts_100_10000, 9, 0.03, 100)

(0.6368, 0.0)

In [265]:
criterion_22_uniform_letter(texts_1000_10000, 9, 0.03, 1000)

(0.5827, 0.0)

In [266]:
criterion_22_uniform_letter(texts_10000_1000, 9, 0.03, 10000)

(0.596, 0.0)

In [267]:
def criterion_22_s_letter(open_texts, A_frq_size, k, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_letter(L))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] < k:
                H1 += 1
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq and tmp[j][1] >= k:
                H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [268]:
criterion_22_s_letter(texts_10_10000, 9, 0.03, 10)

(0.6015, 0.0)

In [269]:
criterion_22_s_letter(texts_100_10000, 9, 0.03, 100)

(0.4781, 0.0)

In [270]:
criterion_22_s_letter(texts_1000_10000, 9, 0.03, 1000)

(0.4167, 0.0)

In [271]:
criterion_22_s_letter(texts_10000_1000, 9, 0.03, 10000)

(0.485, 0.0)

**----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------**

criterion_23(1)

In [281]:
def criterion_23_vigenere_r1_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    Kf = 0
    for i in range(10):
        Kf += dictionary_sorting(letter_counter(TEXT))[i][1]
        
    Hf_ot = []
    Hf_ct = []
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ot.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ot[i] < Kf:
            H1 += 1
        
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ct.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ct[i] >= Kf:
            H0 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [275]:
criterion_23_vigenere_r1_letter(texts_10_10000, 7)

(1.0, 0.0)

In [276]:
criterion_23_vigenere_r1_letter(texts_100_10000, 10)

(1.0, 0.0)

In [277]:
criterion_23_vigenere_r1_letter(texts_1000_10000, 10)

(1.0, 0.0)

In [282]:
criterion_23_vigenere_r1_letter(texts_10000_1000, 10)

(1.0, 0.0)

criterion_23(2)

In [283]:
def criterion_23_vigenere_r5_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    Kf = 0
    for i in range(10):
        Kf += dictionary_sorting(letter_counter(TEXT))[i][1]
        
    Hf_ot = []
    Hf_ct = []
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ot.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ot[i] < Kf:
            H1 += 1
        
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ct.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ct[i] >= Kf:
            H0 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [284]:
criterion_23_vigenere_r5_letter(texts_10_10000, 7)

(1.0, 0.0)

In [285]:
criterion_23_vigenere_r5_letter(texts_100_10000, 7)

(1.0, 0.0)

In [286]:
criterion_23_vigenere_r5_letter(texts_1000_10000, 10)

(1.0, 0.0)

In [287]:
criterion_23_vigenere_r5_letter(texts_10000_1000, 10)

(1.0, 0.0)

criterion_23(3)

In [288]:
def criterion_23_vigenere_r10_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    Kf = 0
    for i in range(10):
        Kf += dictionary_sorting(letter_counter(TEXT))[i][1]
        
    Hf_ot = []
    Hf_ct = []
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ot.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ot[i] < Kf:
            H1 += 1
        
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ct.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ct[i] >= Kf:
            H0 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [289]:
criterion_23_vigenere_r10_letter(texts_10_10000, 7)

(1.0, 0.0)

In [290]:
criterion_23_vigenere_r10_letter(texts_100_10000, 7)

(1.0, 0.0)

In [291]:
criterion_23_vigenere_r10_letter(texts_1000_10000, 7)

(1.0, 0.0)

In [292]:
criterion_23_vigenere_r10_letter(texts_10000_1000, 7)

(1.0, 0.0)

criterion_23(4)

In [297]:
def criterion_23_afine_letter(open_texts, A_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    Kf = 0
    for i in range(10):
        Kf += dictionary_sorting(letter_counter(TEXT))[i][1]
        
    Hf_ot = []
    Hf_ct = []
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_letter(i))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ot.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ot[i] < Kf:
            H1 += 1
        
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ct.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ct[i] >= Kf:
            H0 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [298]:
criterion_23_afine_letter(texts_10_10000, 7)

(1.0, 0.0)

In [299]:
criterion_23_afine_letter(texts_100_10000, 7)

(1.0, 0.0)

In [300]:
criterion_23_afine_letter(texts_1000_10000, 7)

(1.0, 0.0)

In [301]:
criterion_23_afine_letter(texts_10000_1000, 7)

(1.0, 0.0)

criterion_23(5)

In [302]:
def criterion_23_uniform_letter(open_texts, A_frq_size, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    Kf = 0
    for i in range(10):
        Kf += dictionary_sorting(letter_counter(TEXT))[i][1]
        
    Hf_ot = []
    Hf_ct = []
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_letter(L))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ot.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ot[i] < Kf:
            H1 += 1
        
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ct.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ct[i] >= Kf:
            H0 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [303]:
criterion_23_uniform_letter(texts_10_10000, 10, 10)

(1.0, 0.0)

In [304]:
criterion_23_uniform_letter(texts_100_10000, 10, 100)

(1.0, 0.0)

In [305]:
criterion_23_uniform_letter(texts_1000_10000, 10, 1000)

(1.0, 0.0)

In [306]:
criterion_23_uniform_letter(texts_10000_1000, 10, 10000)

(1.0, 0.0)

criterion_23(6)

In [307]:
def criterion_23_s_letter(open_texts, A_frq_size, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    A_frq = A[:A_frq_size]
    
    Kf = 0
    for i in range(10):
        Kf += dictionary_sorting(letter_counter(TEXT))[i][1]
        
    Hf_ot = []
    Hf_ct = []
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_letter(L))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = dictionary_sorting(letter_counter(text_corrupt[i][0]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ot.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ot[i] < Kf:
            H1 += 1
        
    
    for i in range(len(open_texts)):
        tmp = dictionary_sorting(letter_counter(open_texts[i]))
        s = 0
        for j in range(len(tmp[0])):
            if tmp[j][0] in A_frq:
                s += tmp[j][1]
        
        Hf_ct.append(s)
    
    for i in range(len(open_texts)):
        if Hf_ct[i] >= Kf:
            H0 += 1
            
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [308]:
criterion_23_s_letter(texts_10_10000, 10, 10)

(1.0, 0.0)

In [309]:
criterion_23_s_letter(texts_100_10000, 10, 100)

(1.0, 0.0)

In [310]:
criterion_23_s_letter(texts_1000_10000, 10, 1000)

(1.0, 0.0)

In [311]:
criterion_23_s_letter(texts_10000_1000, 10, 10000)

(1.0, 0.0)

**----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------**

criterion_conformity(1)

In [578]:
def criterion_conformity_vigenere_r1_letter(open_texts, k_H):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    I1 = 0.04928235830845431
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in text_corrupt:
        if abs(conformity_index_letter(i[0]) - I1) > k_H:
            H1 += 1
    
    for i in open_texts:
        if abs(conformity_index_letter(i) - I1) <= k_H:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [579]:
criterion_conformity_vigenere_r1_letter(texts_10_10000, 0.02)

(0.5682, 0.4318)

In [580]:
criterion_conformity_vigenere_r1_letter(texts_100_10000, 0.02)

(0.0015, 0.9985)

In [581]:
criterion_conformity_vigenere_r1_letter(texts_1000_10000, 0.02)

(0.0, 1.0)

In [582]:
criterion_conformity_vigenere_r1_letter(texts_10000_1000, 0.02)

(0.0, 1.0)

criterion_conformity(2)

In [583]:
def criterion_conformity_vigenere_r5_letter(open_texts, k_H):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    I1 = 0.04928235830845431
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
    
    for i in text_corrupt:
        if abs(conformity_index_letter(i[0]) - I1) > k_H:
            H1 += 1
    
    for i in open_texts:
        if abs(conformity_index_letter(i) - I1) <= k_H:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [584]:
criterion_conformity_vigenere_r5_letter(texts_10_10000, 0.02)

(0.6487, 0.4318)

In [585]:
criterion_conformity_vigenere_r5_letter(texts_100_10000, 0.02)

(0.0271, 0.9985)

In [586]:
criterion_conformity_vigenere_r5_letter(texts_1000_10000, 0.02)

(0.0, 1.0)

In [587]:
criterion_conformity_vigenere_r5_letter(texts_10000_1000, 0.02)

(0.0, 1.0)

criterion_conformity(3)

In [588]:
def criterion_conformity_vigenere_r10_letter(open_texts, k_H):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    I1 = 0.04928235830845431
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
    
    for i in text_corrupt:
        if abs(conformity_index_letter(i[0]) - I1) > k_H:
            H1 += 1
    
    for i in open_texts:
        if abs(conformity_index_letter(i) - I1) <= k_H:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [589]:
criterion_conformity_vigenere_r10_letter(texts_10_10000, 0.02)

(0.6628, 0.4318)

In [590]:
criterion_conformity_vigenere_r10_letter(texts_100_10000, 0.02)

(0.0868, 0.9985)

In [591]:
criterion_conformity_vigenere_r10_letter(texts_1000_10000, 0.02)

(0.0, 1.0)

In [592]:
criterion_conformity_vigenere_r10_letter(texts_10000_1000, 0.02)

(0.0, 1.0)

criterion_conformity(4)

In [593]:
def criterion_conformity_afine_letter(open_texts, k_H):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    I1 = 0.04928235830845431
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_letter(i))
        text_corrupt.append(l)
    
    for i in text_corrupt:
        if abs(conformity_index_letter(i[0]) - I1) > k_H:
            H1 += 1
    
    for i in open_texts:
        if abs(conformity_index_letter(i) - I1) <= k_H:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [594]:
criterion_conformity_afine_letter(texts_10_10000, 0.02)

(0.659, 0.4318)

In [595]:
criterion_conformity_afine_letter(texts_100_10000, 0.02)

(0.4991, 0.9985)

In [596]:
criterion_conformity_afine_letter(texts_1000_10000, 0.02)

(0.5017, 1.0)

conformity_index(5)

In [597]:
def criterion_conformity_uniform_letter(open_texts, k_H, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    I1 = 0.04928235830845431
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_letter(L))
        text_corrupt.append(l)
    
    for i in text_corrupt:
        if abs(conformity_index_letter(i[0]) - I1) > k_H:
            H1 += 1
    
    for i in open_texts:
        if abs(conformity_index_letter(i) - I1) <= k_H:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [598]:
criterion_conformity_uniform_letter(texts_10_10000, 0.02, 10)

(0.6707, 0.4318)

In [602]:
criterion_conformity_uniform_letter(texts_100_10000, 0.02, 100)

(0.2097, 0.9985)

In [603]:
criterion_conformity_uniform_letter(texts_1000_10000, 0.02, 1000)

(0.0, 1.0)

In [605]:
criterion_conformity_uniform_letter(texts_10000_1000, 0.02, 10000)

(0.0, 1.0)

criterion_conformity(6)

In [606]:
def criterion_conformity_s_letter(open_texts, k_H, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    I1 = 0.04928235830845431
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_letter(L))
        text_corrupt.append(l)
    
    for i in text_corrupt:
        if abs(conformity_index_letter(i[0]) - I1) > k_H:
            H1 += 1
    
    for i in open_texts:
        if abs(conformity_index_letter(i) - I1) <= k_H:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [607]:
criterion_conformity_s_letter(texts_10_10000, 0.02, 10)

(0.699, 0.4318)

In [608]:
criterion_conformity_s_letter(texts_100_10000, 0.02, 100)

(0.2511, 0.9985)

In [609]:
criterion_conformity_s_letter(texts_1000_10000, 0.02, 1000)

(0.2483, 1.0)

In [610]:
criterion_conformity_s_letter(texts_10000_1000, 0.02, 10000)

(0.596, 1.0)

criterion_50(1)

In [313]:
def criterion_50_vigenere_r1_letter(open_texts, k_empt):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    

    B_prh = A[3:] # j = 2
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = set(text_corrupt[i][0])
        if len(set(B_prh) - tmp) < k_empt:
            H1 += 1
    
    for i in range(len(open_texts)):
        tmp = set(open_texts[i])
        if len(set(B_prh) - tmp) >= k_empt:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN
    

In [315]:
criterion_50_vigenere_r1_letter(texts_10_10000, 1)

(0.0, 1.0)

In [316]:
criterion_50_vigenere_r1_letter(texts_100_10000, 1)

(0.0004, 1.0)

In [317]:
criterion_50_vigenere_r1_letter(texts_1000_10000, 1)

(0.6834, 0.3493)

In [318]:
criterion_50_vigenere_r1_letter(texts_10000_1000, 1)

(1.0, 0.0)

criterion_50(2)

In [319]:
def criterion_50_vigenere_r5_letter(open_texts, k_empt):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    

    B_prh = A[3:] # j = 2
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = set(text_corrupt[i][0])
        if len(set(B_prh) - tmp) < k_empt:
            H1 += 1
    
    for i in range(len(open_texts)):
        tmp = set(open_texts[i])
        if len(set(B_prh) - tmp) >= k_empt:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [321]:
criterion_50_vigenere_r5_letter(texts_10_10000, 2)

(0.0, 1.0)

In [322]:
criterion_50_vigenere_r5_letter(texts_100_10000, 2)

(0.371, 0.9979)

In [323]:
criterion_50_vigenere_r5_letter(texts_1000_10000, 2)

(1.0, 0.0194)

In [324]:
criterion_50_vigenere_r5_letter(texts_10000_1000, 2)

(1.0, 0.0)

criterion_50(3)

In [325]:
def criterion_50_vigenere_r10_letter(open_texts, k_empt):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    

    B_prh = A[3:] # j = 2
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = set(text_corrupt[i][0])
        if len(set(B_prh) - tmp) < k_empt:
            H1 += 1
    
    for i in range(len(open_texts)):
        tmp = set(open_texts[i])
        if len(set(B_prh) - tmp) >= k_empt:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [326]:
criterion_50_vigenere_r10_letter(texts_10_10000, 2)

(0.0, 1.0)

In [327]:
criterion_50_vigenere_r10_letter(texts_100_10000, 2)

(0.5122, 0.9979)

In [328]:
criterion_50_vigenere_r10_letter(texts_1000_10000, 2)

(1.0, 0.0194)

In [329]:
criterion_50_vigenere_r10_letter(texts_10000_1000, 2)

(1.0, 0.0)

criterion_50(4)

In [330]:
def criterion_50_afine_letter(open_texts, k_empt):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    

    B_prh = A[3:] # j = 2
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_letter(i))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = set(text_corrupt[i][0])
        if len(set(B_prh) - tmp) < k_empt:
            H1 += 1
    
    for i in range(len(open_texts)):
        tmp = set(open_texts[i])
        if len(set(B_prh) - tmp) >= k_empt:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [332]:
criterion_50_afine_letter(texts_10_10000, 2)

(0.0, 1.0)

In [333]:
criterion_50_afine_letter(texts_100_10000, 2)

(0.0026, 0.9979)

In [334]:
criterion_50_afine_letter(texts_1000_10000, 2)

(0.4902, 0.0194)

In [335]:
criterion_50_afine_letter(texts_10000_1000, 2)

(0.522, 0.0)

criterion_50(5)

In [341]:
def criterion_50_uniform_letter(open_texts, k_empt, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    

    B_prh = A[3:] # j = 2
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_letter(L))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = set(text_corrupt[i][0])
        if len(set(B_prh) - tmp) < k_empt:
            H1 += 1
    
    for i in range(len(open_texts)):
        tmp = set(open_texts[i])
        if len(set(B_prh) - tmp) >= k_empt:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [342]:
criterion_50_uniform_letter(texts_10_10000, 1, 10)

(0.0, 1.0)

In [343]:
criterion_50_uniform_letter(texts_100_10000, 1, 100)

(0.2763, 1.0)

In [344]:
criterion_50_uniform_letter(texts_1000_10000, 1, 1000)

(1.0, 0.3493)

In [345]:
criterion_50_uniform_letter(texts_10000_1000, 1, 10000)

(1.0, 0.0)

criterion_50(s_letter)

In [355]:
def criterion_50_s_letter(open_texts, k_empt, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    

    B_prh = A[2:] # j = 2
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_letter(L))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = set(text_corrupt[i][0])
        if len(set(B_prh) - tmp) < k_empt:
            H1 += 1
    
    for i in range(len(open_texts)):
        tmp = set(open_texts[i])
        if len(set(B_prh) - tmp) >= k_empt:
            H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [347]:
criterion_50_s_letter(texts_10_10000, 1, 10)

(0.0, 1.0)

In [349]:
criterion_50_s_letter(texts_100_10000, 1, 100)

(0.0, 1.0)

In [350]:
criterion_50_s_letter(texts_1000_10000, 1, 1000)

(0.0, 0.3493)

In [357]:
criterion_50_s_letter(texts_10000_1000, 1, 10000)

(0.0, 0.0)

2) l = 2

- criterion_20:
> 1. criterion_20_vigenere_r1_bigram +
> 2. criterion_20_vigenere_r5_bigram +
> 3. criterion_20_vigenere_r10_bigram +
> 4. criterion_20_afine_bigram +
> 5. criterion_20_uniform_bigram +
> 6. criterion_20_s_bigram +

- criterion_21:
> 1. criterion_21_vigenere_r1_bigram +
> 2. criterion_21_vigenere_r5_bigram +
> 3. criterion_21_vigenere_r10_bigram +
> 4. criterion_21_afine_bigram +
> 5. criterion_21_uniform_bigram +
> 6. criterion_21_s_bigram +

- criterion_22:
> 1. criterion_22_vigenere_r1_bigram 
> 2. criterion_22_vigenere_r5_bigram 
> 3. criterion_22_vigenere_r10_bigram
> 4. criterion_22_afine_bigram
> 5. criterion_22_uniform_bigram 
> 6. criterion_22_s_bigram 

- criterion_23:
> 1. criterion_23_vigenere_r1_bigram 
> 2. criterion_23_vigenere_r5_bigram 
> 3. criterion_23_vigenere_r10_bigram
> 4. criterion_23_afine_bigram  
> 5. criterion_23_uniform_bigram 
> 6. criterion_23_s_bigram

- criterion_conformity
> 1. criterion_conformity_vigenere_r1_bigram
> 2. criterion_conformity_vigenere_r5_bigram
> 3. criterion_conformity_vigenere_r10_bigram
> 4. criterion_conformity_afine_bigram
> 5. criterion_conformity_uniform_bigram
> 6. criterion_conformity_s_bigram

- criterion_50
> 1. criterion_50_vigenere_r1_bigram 
> 2. criterion_50_vigenere_r5_bigram 
> 3. criterion_50_vigenere_r10_bigram 
> 4. criterion_50_afine_bigram
> 5. criterion_50_uniform_bigram 
> 6. criterion_50_s_bigram


- criterion_structural
> 1. criterion_structural_vigenere_r1_bigram
> 2. criterion_structural_vigenere_r5_bigram
> 3. criterion_structural_vigenere_r10_bigram
> 4. criterion_structural_afine_bigram
> 5. criterion_structural_uniform_bigram
> 6. criterion_structural_s_bigram

In [398]:
B = ['на', 'ов', 'ві', 'не', 'ти', 'по', 'ро', 'ав', 'го', 'ер', 'ст', 'ог', 'ла', 'ли', 'та', 'ін', 'ви', 'он', 'во', 'ом']

criterion_20(1)

In [446]:
def criterion_20_vigenere_r1_bigram(open_texts, B_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) == len(B_frq):
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) != len(B_frq):
            H0 += 1
        
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN 
    

In [447]:
criterion_20_vigenere_r1_bigram(texts_10_10000, 9)

(0.0, 1.0)

In [448]:
criterion_20_vigenere_r1_bigram(texts_100_10000, 9)

(0.0002, 1.0)

In [449]:
criterion_20_vigenere_r1_bigram(texts_1000_10000, 9)

(0.922, 0.9721)

In [450]:
criterion_20_vigenere_r1_bigram(texts_10000_1000, 9)

(1.0, 0.959)

criterion_20(2)

In [452]:
def criterion_20_vigenere_r5_bigram(open_texts, B_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) == len(B_frq):
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) != len(B_frq):
            H0 += 1
        
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN 

In [453]:
criterion_20_vigenere_r5_bigram(texts_10_10000, 9)

(0.0, 1.0)

In [454]:
criterion_20_vigenere_r5_bigram(texts_100_10000, 9)

(0.0002, 1.0)

In [455]:
criterion_20_vigenere_r5_bigram(texts_1000_10000, 9)

(0.922, 0.9992)

In [456]:
criterion_20_vigenere_r5_bigram(texts_10000_1000, 9)

(1.0, 0.707)

criterion_20(3)

In [457]:
def criterion_20_vigenere_r10_bigram(open_texts, B_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) == len(B_frq):
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) != len(B_frq):
            H0 += 1
        
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN 

In [461]:
criterion_20_vigenere_r10_bigram(texts_10_10000, 9)

(0.0, 1.0)

In [462]:
criterion_20_vigenere_r10_bigram(texts_100_10000, 9)

(0.0002, 1.0)

In [463]:
criterion_20_vigenere_r10_bigram(texts_1000_10000, 9)

(0.922, 0.9996)

In [464]:
criterion_20_vigenere_r10_bigram(texts_10000_1000, 9)

(1.0, 0.731)

criterion_20(4)

In [416]:
def criterion_20_afine_bigram(open_texts, B_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_bigram(i))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) == len(B_frq):
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) != len(B_frq):
            H0 += 1
        
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN    

In [423]:
criterion_20_afine_bigram(texts_10_10000, 9)

(0.0, 1.0)

In [425]:
criterion_20_afine_bigram(texts_100_10000, 9)

(0.0002, 1.0)

In [426]:
criterion_20_afine_bigram(texts_1000_10000, 9)

(0.922, 1.0)

In [427]:
criterion_20_afine_bigram(texts_10000_1000, 9)

(1.0, 0.999)

criterion_20(5)

In [430]:
def criterion_20_uniform_bigram(open_texts, B_frq_size, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_bigram(L))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) == len(B_frq):
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) != len(B_frq):
            H0 += 1
        
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [431]:
criterion_20_uniform_bigram(texts_10_10000, 9, 10)

(0.0, 1.0)

In [432]:
criterion_20_uniform_bigram(texts_100_10000, 9, 100)

(0.0002, 1.0)

In [433]:
criterion_20_uniform_bigram(texts_1000_10000, 9, 1000)

(0.922, 0.9998)

In [434]:
criterion_20_uniform_bigram(texts_10000_1000, 9, 10000)

(1.0, 0.073)

In [437]:
def criterion_20_s_bigram(open_texts, B_frq_size, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_bigram(L))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) == len(B_frq):
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) != len(B_frq):
            H0 += 1
        
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [438]:
criterion_20_s_bigram(texts_10_10000, 9, 10)

(0.0, 1.0)

In [439]:
criterion_20_s_bigram(texts_100_10000, 9, 100)

(0.0002, 1.0)

In [440]:
criterion_20_s_bigram(texts_1000_10000, 9, 1000)

(0.922, 1.0)

In [443]:
criterion_20_s_bigram(texts_10000_1000, 15, 10000)

(1.0, 1.0)

criterion_21(1)

In [470]:
def criterion_21_vigenere_r1_bigram(open_texts, B_frq_size, kf):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) <= kf:
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) > kf:
            H0 += 1
    
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [471]:
criterion_21_vigenere_r1_bigram(texts_10_10000, 9, 2)

(0.9941, 0.0005)

In [472]:
criterion_21_vigenere_r1_bigram(texts_100_10000, 9, 2)

(0.2015, 0.0311)

In [473]:
criterion_21_vigenere_r1_bigram(texts_1000_10000, 9, 2)

(0.0, 0.3463)

In [474]:
criterion_21_vigenere_r1_bigram(texts_10000_1000, 9, 2)

(0.0, 0.925)

criterion_21(2)

In [475]:
def criterion_21_vigenere_r5_bigram(open_texts, B_frq_size, kf):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[1]))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) <= kf:
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) > kf:
            H0 += 1
    
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [476]:
criterion_21_vigenere_r5_bigram(texts_10_10000, 9, 2)

(0.9941, 0.0)

In [477]:
criterion_21_vigenere_r5_bigram(texts_100_10000, 9, 2)

(0.2015, 0.0072)

In [478]:
criterion_21_vigenere_r5_bigram(texts_1000_10000, 9, 2)

(0.0, 0.6326)

In [479]:
criterion_21_vigenere_r5_bigram(texts_10000_1000, 9, 2)

(0.0, 1.0)

criterion_21(3)

In [480]:
def criterion_21_vigenere_r10_bigram(open_texts, B_frq_size, kf):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[2]))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) <= kf:
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) > kf:
            H0 += 1
    
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [481]:
criterion_21_vigenere_r10_bigram(texts_10_10000, 9, 2)

(0.9941, 0.0)

In [482]:
criterion_21_vigenere_r10_bigram(texts_100_10000, 9, 2)

(0.2015, 0.008)

In [483]:
criterion_21_vigenere_r10_bigram(texts_1000_10000, 9, 2)

(0.0, 0.6294)

In [484]:
criterion_21_vigenere_r10_bigram(texts_10000_1000, 9, 2)

(0.0, 1.0)

criterion_21(4)

In [485]:
def criterion_21_afine_bigram(open_texts, B_frq_size, kf):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(Afine_bigram(i))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) <= kf:
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) > kf:
            H0 += 1
    
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [486]:
criterion_21_afine_bigram(texts_10_10000, 9, 2)

(0.9941, 0.0)

In [487]:
criterion_21_afine_bigram(texts_100_10000, 9, 2)

(0.2015, 0.0066)

In [488]:
criterion_21_afine_bigram(texts_1000_10000, 9, 2)

(0.0, 0.2948)

In [489]:
criterion_21_afine_bigram(texts_10000_1000, 9, 2)

(0.0, 0.768)

criterion_21(5)

In [491]:
def criterion_21_uniform_bigram(open_texts, B_frq_size, kf, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(uniform_bigram(L))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) <= kf:
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) > kf:
            H0 += 1
    
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [492]:
criterion_21_uniform_bigram(texts_10_10000, 9, 2, 10)

(0.9941, 0.0)

In [493]:
criterion_21_uniform_bigram(texts_100_10000, 9, 2, 100)

(0.2015, 0.0072)

In [494]:
criterion_21_uniform_bigram(texts_1000_10000, 9, 2, 1000)

(0.0, 0.7486)

In [495]:
criterion_21_uniform_bigram(texts_10000_1000, 9, 2, 10000)

(0.0, 1.0)

criterion_21(6)

In [496]:
def criterion_21_s_bigram(open_texts, B_frq_size, kf, L):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in range(len(open_texts)):
        l = []
        l.append(s_bigram(L))
        text_corrupt.append(l)
    
    for i in open_texts:
        l = []
        for j in range(len(i) // 2):
            l.append(i[2*j:2*j+2])
        
        if len(list(set(l) & set(B_frq))) <= kf:
            H1 += 1
    
    for i in text_corrupt:
        l = []
        for j in range(len(i[0]) // 2):
            l.append(i[0][2*j:2*j+2])
            
        if len(list(set(l) & set(B_frq))) > kf:
            H0 += 1
    
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [497]:
criterion_21_s_bigram(texts_10_10000, 9, 2, 10)

(0.9941, 0.0)

In [498]:
criterion_21_s_bigram(texts_100_10000, 9, 2, 100)

(0.2015, 0.0093)

In [499]:
criterion_21_s_bigram(texts_1000_10000, 9, 2, 1000)

(0.0, 0.5876)

In [500]:
criterion_21_s_bigram(texts_10000_1000, 9, 2, 10000)

(0.0, 0.949)

criterion_22(1) !!!!!!

In [552]:
def criterion_22_vigenere_r1_bigram(open_texts, B_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    text_corrupt = []
    for i in open_texts:
        l = []
        l.append(VIGENERE_encryption_letter(i, key_initialization()[0]))
        text_corrupt.append(l)
    
    for i in range(len(text_corrupt)):
        tmp = disjoint_bigrams(text_corrupt[i][0])
        for t, v in tmp.items():
            if tmp[t] in B_frq and tmp[v] < disjoint_bigrams(TEXT)[0][tmp[t]][1]:
                H1 += 1
    
    for i in range(len(open_texts)):
        tmp = disjoint_bigrams(open_texts[i])
        for t, v in tmp.items():
            if tmp[t] in B_frq and tmp[v] < disjoint_bigrams(TEXT)[0][tmp[t]][1]:
                H0 += 1
    
    FP = H1 / len(open_texts)
    FN = H0 / len(open_texts)
    
    return FP, FN

In [553]:
criterion_22_vigenere_r1_bigram(texts_10_10000, 9)

(0.0, 0.0)

In [546]:
criterion_22_vigenere_r1_bigram(texts_100_10000, 9)

(0.0, 0.0)

In [547]:
criterion_22_vigenere_r1_bigram(texts_1000_10000, 9)

(0.0, 0.0)

criterion_23(1)

In [556]:
def criterion_23_vigenere_r1_bigram(B_frq_size):
    FP = 0
    FN = 0
    
    H1 = 0
    H0 = 0
    
    B_frq = B[:B_frq_size]
    
    Kf = 0
    d = disjoint_bigrams(TEXT)
    for k, v in d.items():
        if d[k] in B_frq:
            Kf += d[v]
    
    return Kf
    
    

In [558]:
criterion_23_vigenere_r1_bigram(20)

0

In [399]:
t = 'авблагоу'

In [400]:
print(list(t))

['а', 'в', 'б', 'л', 'а', 'г', 'о', 'у']


In [402]:
l = []
for i in range(len(t) // 2):
    l.append(t[2*i:2*i + 2])

print(l)
    

['ав', 'бл', 'аг', 'оу']
