# Vim Sensei

Make Vim discoverable!

In [None]:
from collections import Counter
from pprint import pprint

In [None]:
ascii_control_codes = {
    '\\x00': '^@',
    '\\x01': '^A',
    '\\x02': '^B',
    '\\x03': '^C',
    '\\x04': '^D',
    '\\x05': '^E',
    '\\x06': '^F',
    '\\x07': '^G',
    '\\x08': '^H',
    '\\x09': '^I',
    '\\x0a': '^J',
    '\\x0b': '^K',
    '\\x0c': '^L',
    '\\x0d': '^M',
    '\\x0e': '^N',
    '\\x0f': '^O',
    '\\x10': '^P',
    '\\x11': '^Q',
    '\\x12': '^R',
    '\\x13': '^S',
    '\\x14': '^T',
    '\\x15': '^U',
    '\\x16': '^V',
    '\\x17': '^W',
    '\\x18': '^X',
    '\\x19': '^Y',
    '\\x1a': '^Z',
    '\\x1b': '^[',
    '\\x1c': '^\\',
    '\\x1d': '^]',
    '\\x1e': '^^',
    '\\x1f': '^_',
    '\\x7f': '^?',
}

def human_readable(token):
    if token in ascii_control_codes:
        return ascii_control_codes[token]
    return token

In [None]:
frequencies_for_tokens = Counter()
frequencies_for_bigrams = Counter()
frequencies_for_trigrams = Counter()
previous_token = None
preprevious_token = None
with open('vim.log', 'r') as f:
    for line in f:
        token = line.replace('\n', '')
        token = human_readable(token)
        frequencies_for_tokens[token] += 1
        if previous_token is not None:
            frequencies_for_bigrams[previous_token+token] += 1
        if preprevious_token is not None:
            frequencies_for_trigrams[preprevious_token+previous_token+token] += 1
        preprevious_token = previous_token
        previous_token = token

In [15]:
pprint(frequencies_for_tokens.most_common(10))

[('j', 53763),
 ('k', 38341),
 ('w', 27019),
 ('n', 14119),
 (':', 13744),
 ('0', 11727),
 ('l', 11093),
 ('z', 10013),
 (' ', 9536),
 ('g', 8082)]


In [16]:
pprint(frequencies_for_bigrams.most_common(10))

[('jj', 31268),
 ('kk', 21010),
 ('ww', 9193),
 ('nn', 7571),
 ('0w', 6993),
 ('^D^D', 5367),
 ('jk', 4747),
 ('kj', 3719),
 ('zz', 3692),
 ('ll', 3049)]


In [17]:
pprint(frequencies_for_trigrams.most_common(10))

[('jjj', 21271),
 ('kkk', 13747),
 ('nnn', 4991),
 ('www', 4924),
 ('^D^D^D', 3937),
 ('jjjjj', 2558),
 ('jjk', 2001),
 ('jkk', 1842),
 ('0w:', 1808),
 ('jjjj', 1803)]


## TODO

1. Break up clumped tokens