## imports

In [1]:
!pip install pycld2 



In [11]:
import math

import itertools
import sys
import textwrap

import pycld2 as cld2

## Vigenère Cipher's

In [12]:
def vigenere_cipher_unicode(text: str, key: str, encode=False): return ''.join([chr((ord(text[i]) - ((-1) if encode else 1) * ord(key[i % len(key)])) % 1114112) for i in range(len(text))])
def vigenere_cipher_alphabet(text: str, key: str, alphabet=None, encode=False): return ''.join([alphabet[(alphabet.index(text[i]) - ((-1) if encode else 1) * alphabet.index(key[i % len(key)])) % len(alphabet)] for i in range(len(text))])

## test

alphabet, key, plaintext  

In [13]:
def normalize(text, alphabet):
    return ''.join([ch.lower() for ch in text if ch.lower() in alphabet])

alphabet = "abcdefghijklmnopqrstuvwxyz \n"

key = normalize('somebody', alphabet)

plaintext = normalize("""but you didnt have to cut me off
make out like it never happened and that we were nothing
and i dont even need your love
but you treat me like a stranger and that feels so rough
no you didnt have to stoop so low
have your friends collect your records and then change your number
i guess that i dont need that though
now youre just somebody that i used to know
now youre just somebody that i used to know
now youre just somebody that i used to know""", alphabet)

In [14]:
print(f'KEY:\n{key}\n')
print(f'PLAINTEXT:\n{plaintext}\n')

cipher = vigenere_cipher_alphabet(plaintext, key, alphabet, encode=True)
print(f'ENCODE:\n{cipher}\n')
print(f"DECODE:\n{vigenere_cipher_alphabet(cipher, key, alphabet)}\n")

KEY:
somebody

PLAINTEXT:
but you didnt have to cut me off
make out like it never happened and that we were nothing
and i dont even need your love
but you treat me like a stranger and that feels so rough
no you didnt have to stoop so low
have your friends collect your records and then change your number
i guess that i dont need that though
now youre just somebody that i used to know
now youre just somebody that i used to know
now youre just somebody that i used to know

ENCODE:
tgdczaxwvwprumkylskxpmfqjmyi
aibr mofmrqjmxmlsbejmziwsuwzo
tf
h
qozh
fkyjmgi
ihnwmzsuvljynmremlwvazx
syadmzifrbuegbcmayarpex
krqqfbibfbiwmxmlsbyqedvb
jahmmremwdsfkjfsooqe csaxcznzs
krqqruhofbdshqcuaboja t
erwbagdioyaqk ysmin szhtmfkbzqgum
kkdkvfqrnvekeorbpzszcdvdjyskapguwdgyffdcequeitebpzodcjmgkdfkrfsgwjvmx
fkkkutdoazwoaevfmmqifkwp hzericuvdpqwkytsgwjakooazxdagczaxnwmvytfboe qfpr
wjvmx
wbqispcuabgdagdoazwoaevfmmqifkwp hzericuvdpqwkytsgwjakooaz

DECODE:
but you didnt have to cut me off
make out like it never happ

## Kasiski examination


In [15]:
import re
import math

def kasiski(s, min_num = 3):
    out = ''

    matches = []
    found = {}
    for k in range(min_num, len(s) // 2):
        found[k] = {}
        shouldbreak = True
        for i in range(0, len(s) - k):
            v = s[i:(i+k)]
            if v not in found[k]:
                found[k][v] = 1
            else:
                found[k][v] += 1
                shouldbreak = False

        if shouldbreak:
            break

        for v in found[k]:
            if found[k][v] > 2:
                matches.append(v)

    print("Length  Count        Word")
    print("======  =====  ==========")
    keylens = []
    for v in matches:
        k = len(v)
        if k not in keylens:
            keylens.extend(range(k, k * 4, k))
        print(("%6d  %5d  %10s" % (k, found[k][v], v)))

    return out, keylens

In [16]:
out, keylens = kasiski(cipher)
print(out)

Length  Count        Word
     3      3         sgw
     3      3         gwj
     3      3         oaz
     4      3        sgwj



## solve vigenere

In [17]:
ENGLISH_FREQ = (0.0749, 0.0129, 0.0354, 0.0362, 0.1400, 0.0218, 0.0174, 0.0422, 0.0665, 0.0027, 0.0047,
                0.0357, 0.0339, 0.0674, 0.0737, 0.0243, 0.0026, 0.0614, 0.0695, 0.0985, 0.0300, 0.0116,
                0.0169, 0.0028, 0.0164, 0.0004)

def compare_freq(text, alphabet):
    if not text:
        return None
    text = [t for t in text]
    freq = [0] * (len(alphabet))
    total = float(len(text))
    for l in text:
        freq[alphabet.index(l)] += 1
    return sum(abs(f / total - E) for f, E in zip(freq, ENGLISH_FREQ))

def find_keys_vigenere(text, keylens=range(3, 20), alphabet=None):
    best_keys = []

    text_letters = [c for c in text.lower()]

    for key_length in keylens:
        key = [None] * key_length
        for key_index in range(key_length):
            letters = "".join(itertools.islice(text_letters, key_index, None, key_length))
            shifts = []
            for key_char in alphabet:
                shifts.append(
                    (compare_freq(vigenere_cipher_alphabet(letters, key_char, alphabet), 
                                  alphabet), 
                        key_char)
                )
            key[key_index] = min(shifts, key=lambda x: x[0])[1]
        best_keys.append("".join(key))
    best_keys.sort(key=lambda key: compare_freq(vigenere_cipher_alphabet(letters, key_char, alphabet), alphabet))
    return best_keys


In [18]:
best_keys = find_keys_vigenere(cipher, keylens, alphabet=alphabet)

In [19]:
for pkey in best_keys:
    decoded = vigenere_cipher_alphabet(cipher, pkey, alphabet)
    _, _, _, detected_language = cld2.detect(decoded,  returnVectors=True)
    if detected_language[0][3] == 'en':
        print(f'===KEY:{pkey}===\n{decoded}\n')

===KEY:somebody===
but you didnt have to cut me off
make out like it never happened and that we were nothing
and i dont even need your love
but you treat me like a stranger and that feels so rough
no you didnt have to stoop so low
have your friends collect your records and then change your number
i guess that i dont need that though
now youre just somebody that i used to know
now youre just somebody that i used to know
now youre just somebody that i used to know

