In [1]:
class VignereCipher:
    plaintextSpace = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    
    def __init__(self, secretKey):
        self.key = secretKey.upper()
        self.length = len(secretKey)
        
    def __repr(self):
        print "Secret Key:", self.key
    
    def shiftLetters(self, shift, letter, right=True):
        shift = ord(shift) - ord('A')
        shift = shift if right else -shift
        shifted = self.plaintextSpace[(self.plaintextSpace.index(letter) + shift) % 26]
        return shifted

    def encryptMessage(self, message):
        return ''.join([self.shiftLetters(self.key[key_idx % self.length], letter)
                        for key_idx, letter in enumerate(message.upper())])
    
    def decryptMessage(self, message):
        return ''.join([self.shiftLetters(self.key[key_idx % self.length], letter, False)
                    for key_idx, letter in enumerate(message.upper())])   

        

In [2]:
test1 = VignereCipher("CRYPTO")
test1.decryptMessage(test1.encryptMessage("thisisamessage"))

'THISISAMESSAGE'

In [3]:
LETTER_FREQ = [
    ['a',8.167], ['b',1.492], ['c',2.782], ['d',4.253], ['e',12.70], ['f',2.228], ['g',2.015],
    ['h',6.094], ['i',6.966], ['j',0.153], ['k',0.772], ['l',4.025], ['m',2.406], ['n',6.749],
    ['o',7.507], ['p',1.929], ['q',0.095], ['r',5.987], ['s',6.327], ['t',9.056], ['u',2.758],
    ['v',0.978], ['w',2.360], ['x',0.150], ['y',1.974], ['z',0.074]
]
LETTER_FREQUENCY = sorted(map(lambda x: (x[0].upper(), round(x[1], 4)), LETTER_FREQ))

In [23]:
from collections import Counter

class CrackVignere:
    letterFrequency = LETTER_FREQUENCY
    capsletters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

    def __init__(self, message):
        self.ciphertext = message

    def lengthProbability(self, length):
        """
        Note: Requires ciphertext be long enough to have a decent sample of letters
        1. Divide ciphertext letters into l groups G0,G1,...,Gl-1
            where ith ciphertext letter is placed in group Gi mod l
        2. Calculate probability of letters in each group being same 
            as a typical English distribution
        """
        # Possibly use Kolmogorov-Smirnov Test, though message size may be too small

        grouped = self.groupList(self.ciphertext, length)
        freqs = [self.distributionError(self.letterFrequency, self.relativeFrequency(group))[1]
                 for group in grouped]
        return sum(freqs) / len(freqs)
        
        
    def groupList(self, lst, size):
        return [lst[i::size] for i in range(size)]
        
    def relativeFrequency(self, sample_dist):
        frequency = Counter(sample_dist)
        total_count = sum(frequency.itervalues())
        rel_frequency = [(k, 100.0 * float(v) / total_count) for k,v in frequency.iteritems()]
        return rel_frequency
    
    def distributionError(self, alst1, alst2):
        a1 = sorted(alst1, key = lambda x: x[1])
        a2 = sorted(alst2, key = lambda x: x[1])
        dist = sum( (a1[i][1] - a2[i][1])**2 for i in range(len(a2)) )
#         print len(a1), len(a2)
        return [(a1[i][0], a2[i][0]) for i in range(len(a2))], dist 
        
    def guessKeyLength(self):
        return sorted([(round(self.lengthProbability(length), 3), length)
                           for length in range(2,20)])
        
    def guessSecretKey(self):
        top3lengths = self.guessKeyLength()[:3]
        print "Top 3 lengths:", top3lengths
        
        toplength = top3lengths[0][1]
        grouped = self.groupList(self.ciphertext, toplength)
        groupedFreq = [self.relativeFrequency(group) for group in grouped]
        
        secretkey = ''
        for idx, group in enumerate(groupedFreq):
            mapper, error = self.distributionError(group, self.letterFrequency)
            # print self.modeLetterDiff(mapper)
            # print mapper, error, idx                                 
            secretkey += self.capsletters[int((self.modeLetterDiff(mapper)))]
        
        return secretkey
        
    def modeLetterDiff(self, mapper):
        # mid = len(mapper) / 2
        differences = [(ord(pair[0]) - ord(pair[1])) % 26 for pair in mapper]
        # return sorted([(ord(pair[0]) - ord(pair[1])) % 26 for pair in mapper])[mid]
        return Counter(differences).most_common(1)[0][0]

        

In [24]:
capsletters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
f = open('letters.txt', 'r')
r = f.readlines()
sample_msg = filter(lambda x: x in capsletters, ''.join(r).upper())
vignere = VignereCipher("wankerdanker")
cipher = vignere.encryptMessage(sample_msg)
f.close() 

In [25]:
sample_msg[:10], cipher[:10], vignere.decryptMessage(cipher[:10])

('LETTERSBYT', 'HEGDIIVBLD', 'LETTERSBYT')

In [26]:
cracker = CrackVignere(cipher)

In [27]:
cracker.guessSecretKey()

Top 3 lengths: [(2.003, 12), (8.059, 6), (8.105, 18)]


'WANKERDANKER'

In [28]:
# Nicely done.

#### Stuff that is Ruff

In [None]:
(ord('Z')  - ord('J')) % 26

In [11]:
a = [('B', 'Z'), ('S', 'Q'), ('L', 'X'), ('Z', 'J'), ('M', 'K'), ('X', 'V'), ('D', 'B'), ('I', 'P'), ('Y', 'Y'), ('A', 'G'), ('R', 'F'), ('O', 'W'), ('H', 'M'), ('W', 'U'), ('E', 'C'), ('F', 'L'), ('N', 'D'), ('J', 'R'), ('T', 'H'), ('U', 'S'), ('P', 'N'), ('C', 'I'), ('K', 'O'), ('Q', 'A'), ('V', 'T'), ('G', 'E')]

In [12]:
Counter([(ord(pair[0]) - ord(pair[1])) % 26 for pair in a]).most_common(1)

[(2, 11)]

In [None]:
def relativeFrequency(sample_dist):
    frequency = Counter(sample_dist)
    total_count = sum(frequency.itervalues())
    rel_frequency = [(k, 100.0 * float(v) / total_count) for k,v in frequency.iteritems()]
    return rel_frequency

def distributionError(alst1, alst2):
    a1 = sorted(alst1, key = lambda x: x[1])
    a2 = sorted(alst2, key = lambda x: x[1])
    dist = sum( (a1[i][1] - a2[i][1])**2 for i in range(len(a2)) )
#         print len(a1), len(a2)
    return [(a1[i][0], a2[i][0]) for i in range(len(a2))], dist 

In [None]:
letterC = cipher[0::6]
cShiftFreq = relativeFrequency(letterC)
mapper, error = distributionError(LETTER_FREQUENCY, cShiftFreq)

In [None]:
mapper

In [None]:
cShiftFreq

In [None]:
guesses = [
[('Z', 'H'), ('Q', 'Y'), ('X', 'R'), ('J', 'F'), ('K', 'S'), ('V', 'D'), ('B', 'J'), ('P', 'O'), ('Y', 'E'), ('G', 'G'), ('F', 'X'), ('W', 'U'), ('M', 'N'), ('U', 'C'), ('C', 'K'), ('L', 'L'), ('D', 'T'), ('R', 'P'), ('H', 'Z'), ('S', 'A'), ('N', 'V'), ('I', 'I'), ('O', 'Q'), ('A', 'W'), ('T', 'B'), ('E', 'M')],
[('Z', 'N'), ('Q', 'E'), ('X', 'X'), ('J', 'L'), ('K', 'Y'), ('V', 'J'), ('B', 'P'), ('P', 'U'), ('Y', 'K'), ('G', 'M'), ('F', 'D'), ('W', 'A'), ('M', 'T'), ('U', 'I'), ('C', 'Q'), ('L', 'R'), ('D', 'Z'), ('R', 'V'), ('H', 'F'), ('S', 'G'), ('N', 'B'), ('I', 'O'), ('O', 'W'), ('A', 'C'), ('T', 'H'), ('E', 'S')],
[('Z', 'U'), ('Q', 'L'), ('X', 'E'), ('J', 'S'), ('K', 'F'), ('V', 'Q'), ('B', 'B'), ('P', 'W'), ('Y', 'R'), ('G', 'T'), ('F', 'K'), ('W', 'H'), ('M', 'A'), ('U', 'P'), ('C', 'X'), ('L', 'Y'), ('D', 'G'), ('R', 'C'), ('H', 'M'), ('S', 'N'), ('N', 'I'), ('I', 'V'), ('O', 'D'), ('A', 'J'), ('T', 'O'), ('E', 'Z')],
[('Z', 'M'), ('Q', 'D'), ('X', 'W'), ('J', 'K'), ('K', 'X'), ('V', 'I'), ('B', 'O'), ('P', 'T'), ('Y', 'J'), ('G', 'L'), ('F', 'C'), ('W', 'S'), ('M', 'Z'), ('U', 'H'), ('C', 'P'), ('L', 'Q'), ('D', 'Y'), ('R', 'U'), ('H', 'E'), ('S', 'F'), ('N', 'A'), ('I', 'N'), ('O', 'V'), ('A', 'B'), ('T', 'G'), ('E', 'R')],
[('Z', 'J'), ('Q', 'A'), ('X', 'T'), ('J', 'H'), ('K', 'U'), ('V', 'F'), ('B', 'L'), ('P', 'Q'), ('Y', 'G'), ('G', 'I'), ('F', 'Z'), ('W', 'W'), ('M', 'P'), ('U', 'E'), ('C', 'M'), ('L', 'N'), ('D', 'V'), ('R', 'R'), ('H', 'B'), ('S', 'C'), ('N', 'X'), ('I', 'K'), ('O', 'S'), ('A', 'Y'), ('T', 'D'), ('E', 'O')], 
[('Z', 'W'), ('Q', 'N'), ('X', 'G'), ('J', 'U'), ('K', 'H'), ('V', 'S'), ('B', 'Y'), ('P', 'D'), ('Y', 'T'), ('G', 'V'), ('F', 'M'), ('W', 'J'), ('M', 'C'), ('U', 'R'), ('C', 'Z'), ('L', 'A'), ('D', 'I'), ('R', 'E'), ('H', 'O'), ('S', 'P'), ('N', 'K'), ('I', 'X'), ('O', 'F'), ('A', 'L'), ('T', 'Q'), ('E', 'B')]
]

In [None]:
for guess in guesses:
    print mld(guess)

In [None]:
def mld(mapper):
    return mean([(ord(pair[0]) - ord(pair[1])) % 26 for pair in mapper]) 

In [None]:
mld(x)

In [None]:
for i in "CIPHER": print ord(i) - ord('A')

In [None]:
-4 % 26