# 🔓 Five Ways to Crack a Vigenère Cipher
Brought to you by The Mad Doctor ("madness")

This notebook reviews five methods for breaking the Vigenère cipher. Each section contains the theory and working Python code.

## 🧠 Part 0: Setup

In [1]:
import math
import random
from itertools import product
from math import sqrt, log

ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

def char_to_index(c): return ALPHABET.index(c)
def index_to_char(i): return ALPHABET[i % 26]

## 🔐 Part 1: Vigenère Cipher Implementation

In [4]:
def encrypt(plaintext, key):
    return ''.join(
        ALPHABET[(char_to_index(p) + char_to_index(key[i % len(key)])) % 26]
        for i, p in enumerate(plaintext)
    )

def decrypt(ciphertext, key):
    return ''.join(
        ALPHABET[(char_to_index(c) - char_to_index(key[i % len(key)])) % 26]
        for i, c in enumerate(ciphertext)
    )

In [48]:
plaintext = "YOUREVERYSERIOUSIMAGINATIONTHERESOMETHINGMORETOBETALKEDOFISEEWHATYOUREABOUTYOUMAYHAVEGAINEDMYCONFIDENCEBUTYOUWILLNEVERMYCONSENTYOUSEEKTOENTRAPMEWITHWORDSOFHONOR"
key="KEY"
ciphertext=encrypt(plaintext, key)
ciphertext

'ISSBITOVWCIPSSSCMKKKGXERSSLDLCBIQYQCDLGXKKYVCDSZOXYVOCNSDSWCOAFKXWYYPOEZYYRISSWEWRETOKYSRCNQWMSLPMBORAOFSDCMEAGVPLOZCBQWMSLCILDCMEWCOORYILDVYZQCGMRRAMBHQYJFYRMB'

In [49]:
decrypt(ciphertext, key)

'YOUREVERYSERIOUSIMAGINATIONTHERESOMETHINGMORETOBETALKEDOFISEEWHATYOUREABOUTYOUMAYHAVEGAINEDMYCONFIDENCEBUTYOUWILLNEVERMYCONSENTYOUSEEKTOENTRAPMEWITHWORDSOFHONOR'

## 📊 Part 2: Frequency Tables

In [37]:
def build_monogram_freq(text):
    freqs = [0]*26
    for char in text:
        freqs[char_to_index(char)] += 1
    total = sum(freqs)
    return [f / total for f in freqs]

In [38]:
def build_tetragram_freq(text):
    freqs = [0] * (26**4)
    for i in range(len(text) - 3):
        idx = (char_to_index(text[i]) * 26**3 +
               char_to_index(text[i+1]) * 26**2 +
               char_to_index(text[i+2]) * 26 +
               char_to_index(text[i+3]))
        freqs[idx] += 1
    total = sum(freqs)
    return [f / total for f in freqs]

## 📈 Part 3: Fitness Function

In [39]:
def fitness(text, tetrafrequencies):
    result = 0
    for i in range(len(text) - 3):
        idx = (char_to_index(text[i]) * 26**3 +
               char_to_index(text[i+1]) * 26**2 +
               char_to_index(text[i+2]) * 26 +
               char_to_index(text[i+3]))
        y = tetrafrequencies[idx]
        result += log(y) if y > 0 else -15
    return result / (len(text) - 3)

## 📏 Part 4: Index of Coincidence (IoC)

In [40]:
def index_of_coincidence(text):
    counts = [0]*26
    for char in text:
        counts[char_to_index(char)] += 1
    N = sum(counts)
    numer = sum(f * (f-1) for f in counts)
    return 26 * numer / (N * (N - 1)) if N > 1 else 0

def estimate_period(ciphertext, max_period=20):
    for period in range(1, max_period+1):
        slices = [''] * period
        for i in range(len(ciphertext)):
            slices[i % period] += ciphertext[i]
        avg_ioc = sum(index_of_coincidence(s) for s in slices) / period
        if avg_ioc > 1.6:
            return period
    return None

## 🧨 Part 5: Five Attack Methods

### 🔍 Method 1: Brute Force

In [41]:
def brute_force(ciphertext, period, tetrafrequencies):
    for key_tuple in product(ALPHABET, repeat=period):
        key = ''.join(key_tuple)
        pt = decrypt(ciphertext, key)
        if fitness(pt, tetrafrequencies) > -10:
            return key, pt
    return None, None

### 📖 Method 2: Dictionary Attack

In [42]:
def dictionary_attack(ciphertext, wordlist, tetrafrequencies):
    for key in wordlist:
        pt = decrypt(ciphertext, key)
        if fitness(pt, tetrafrequencies) > -10:
            return key, pt
    return None, None

### 🔐 Method 3: Crib Attack

In [43]:
def crib_attack(ciphertext, crib):
    for i in range(len(ciphertext) - len(crib)):
        piece = ciphertext[i:i+len(crib)]
        decrypted_piece = decrypt(piece, crib)
        print(f"{i}: {decrypted_piece}")

### 🔁 Method 4: Variational Method

In [44]:
def variational_attack(ciphertext, period, tetrafrequencies, threshold=-10):
    key = ['A'] * period
    fit = -99
    while fit < threshold:
        K = key[:]
        x = random.randrange(period)
        for i in range(26):
            K[x] = ALPHABET[i]
            pt = decrypt(ciphertext, ''.join(K))
            F = fitness(pt, tetrafrequencies)
            if F > fit:
                key, fit = K[:], F
    return ''.join(key), decrypt(ciphertext, ''.join(key))

### 📐 Method 5: Statistics-Only Attack

In [45]:
def cosangle(x, y):
    numerator = sum(xi * yi for xi, yi in zip(x, y))
    return numerator / (sqrt(sum(xi**2 for xi in x)) * sqrt(sum(yi**2 for yi in y)))

In [46]:
def statistics_only_attack(ciphertext, period, monofrequencies):
    slices = [''] * period
    for i in range(len(ciphertext)):
        slices[i % period] += ciphertext[i]

    key = ['A'] * period
    for i in range(period):
        freqs = [0] * 26
        for c in slices[i]:
            freqs[char_to_index(c)] += 1
        total = sum(freqs)
        freqs = [f / total for f in freqs]
        for j in range(26):
            rotated = freqs[j:] + freqs[:j]
            if cosangle(rotated, monofrequencies) > 0.9:
                key[i] = ALPHABET[j]
                break
    return ''.join(key), decrypt(ciphertext, ''.join(key))

## 🧪 Example Usage

In [50]:
# Sample ciphertext and training text required
ciphertext = "ISSBITOVWCIPSSSCMKKKGXERSSLDLCBIQYQCDLGXKKYVCDSZOXYVOCNSDSWCOAFKXWYYPOEZYYRISSWEWRETOKYSRCNQWMSLPMBORAOFSDCMEAGVPLOZCBQWMSLCILDCMEWCOORYILDVYZQCGMRRAMBHQYJFYRMB"
#training_text = "YOURLONGENGLISHTEXTHERE"
training_text = load_clean_text("Pride_and_Prejudice.txt")
monofrequencies = build_monogram_freq(training_text)
tetrafrequencies = build_tetragram_freq(training_text)

period = estimate_period(ciphertext)
key, plaintext = statistics_only_attack(ciphertext, period, monofrequencies)
print(f"Key: {key}\nPlaintext: {plaintext}")

Key: KAA
Plaintext: YSSRITEVWSIPISSSMKAKGNERISLTLCRIQOQCTLGNKKOVCTSZEXYLOCDSDIWCEAFAXWOYPEEZOYRYSSMEWHETEKYIRCDQWCSLFMBERAEFSTCMUAGLPLEZCRQWCSLSILTCMUWCEOROILTVYPQCWMRHAMRHQOJFORMR


In [51]:
# Load your ciphertext here
#ciphertext = "..."
# Load frequency tables from English training text (e.g., "Pride and Prejudice")
monofrequencies = build_monogram_freq(training_text)
tetrafrequencies = build_tetragram_freq(training_text)

# Estimate period
period = estimate_period(ciphertext)

# Try various attacks
key, plaintext = statistics_only_attack(ciphertext, period, monofrequencies)
print(f"Key: {key}\nPlaintext: {plaintext}")


Key: KAA
Plaintext: YSSRITEVWSIPISSSMKAKGNERISLTLCRIQOQCTLGNKKOVCTSZEXYLOCDSDIWCEAFAXWOYPEEZOYRYSSMEWHETEKYIRCDQWCSLFMBERAEFSTCMUAGLPLEZCRQWCSLSILTCMUWCEOROILTVYPQCWMRHAMRHQOJFORMR


In [27]:
def clean_text(input_file, output_file):
    with open(input_file, "r", encoding="utf-8") as f:
        raw_text = f.read()

    # Strip header/footer
    start = raw_text.find("*** START OF THE PROJECT GUTENBERG EBOOK")
    end = raw_text.find("*** END OF THE PROJECT GUTENBERG EBOOK")
    text = raw_text[start:end]

    # Keep only uppercase letters
    clean = ''.join(filter(str.isalpha, text)).upper()

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(clean)

clean_text("Pride_and_Prejudice.txt", "Pride_and_Prejudice_clean.txt")


In [28]:
def load_clean_text(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        raw_text = f.read()

    # Keep only uppercase letters
    clean = ''.join(filter(str.isalpha, raw_text)).upper()
    return clean

In [33]:
training_text = load_clean_text("Pride_and_Prejudice.txt")

monofrequencies = build_monogram_freq(training_text)
tetrafrequencies = build_tetragram_freq(training_text)


In [34]:
# Try various attacks
key, plaintext = statistics_only_attack(ciphertext, period, monofrequencies)
print(f"Key: {key}\nPlaintext: {plaintext}")

Key: AAA
Plaintext: ISSBGGZLCBXCHXFOVC


In [52]:
brute_force(ciphertext, period, tetrafrequencies)

('KEY',
 'YOUREVERYSERIOUSIMAGINATIONTHERESOMETHINGMORETOBETALKEDOFISEEWHATYOUREABOUTYOUMAYHAVEGAINEDMYCONFIDENCEBUTYOUWILLNEVERMYCONSENTYOUSEEKTOENTRAPMEWITHWORDSOFHONOR')