https://github.com/mokelly/wabbit_wappa/blob/master/examples/capitalization_demo.py

In [30]:
# -*- coding: utf-8 -*-
from __future__ import print_function, division, absolute_import, unicode_literals

"""
Demonstrate Wabbit Wappa by learning to tell capital letters from lowercase.
by Michael J.T. O'Kelly, 2014-04-02
"""

import string
import random
import time

from wabbit_wappa import *


NUM_SAMPLES = 9


def get_example():
    """Make an example for training and testing.  Outputs a tuple
    (label, features) where label is +1 if capital letters are the majority,
    and -1 otherwise; and features is a list of letters.
    """
    features = random.sample(string.ascii_letters, NUM_SAMPLES)
    num_capitalized = len([ letter for letter in features if letter in string.ascii_uppercase ])
    num_lowercase = len([ letter for letter in features if letter in string.ascii_lowercase ])
    if num_capitalized > num_lowercase:
        label = 1
    else:
        label = -1
    return (label, features)


print("Start a Vowpal Wabbit learner in logistic regression mode")
vw = VW(loss_function='logistic')
print("""vw = VW(loss_function='logistic')""")
# Print the command line used for the VW process
print("VW command:", vw.command)
print()

print("Now generate 10 training examples, feeding them to VW one by one.")
for i in range(10):
    label, features = get_example()
    if label > 0:
        print("Label {}: {} is mostly uppercase".format(label, features))
    else:
        print("Label {}: {} is mostly lowercase".format(label, features))
    vw.send_example(label, features=features)
print()

print("How well trained is our model?  Let's make 100 tests.")
num_tests = 100
num_good_tests = 0
for i in range(num_tests):
    label, features = get_example()
    # Give the features to the model, witholding the label
    prediction = vw.get_prediction(features).prediction
    # Test whether the floating-point prediction is in the right direction (signs agree)
    if label * prediction > 0:
        num_good_tests += 1
print("Correctly predicted", num_good_tests, "out of", num_tests)
print()

print("We can go on training, without restarting the process.  Let's train on 1,000 more examples.")
for i in range(1000):
    label, features = get_example()
    #print(label,features)
    vw.send_example(label, features=features)
print()

print("Now how good are our predictions?")
num_tests = 100
num_good_tests = 0
for i in range(num_tests):
    label, features = get_example()
    # Give the features to the model, witholding the label
    prediction = vw.get_prediction(features).prediction
    # Test whether the floating-point prediction is in the right direction
    if label * prediction > 0:
        num_good_tests += 1
print("Correctly predicted", num_good_tests, "out of", num_tests)
print()
filename = 'capitalization.saved.model'
print("We can save the model at any point in the process.")
print("Saving now to", filename)
vw.save_model(filename)
vw.close()
print()

print("We can reload our model using the 'i' argument:")
vw2 = VW(loss_function='logistic', i=filename)
print("""vw2 = VW(loss_function='logistic', i=filename)""")
print("VW command:", vw2.command)

print("How fast can we train and test?")
num_examples = 10000
# Generate examples ahead of time so we don't measure that overhead
examples = [ get_example() for i in range(num_examples) ]
print("Training on", num_examples, "examples...")

Start a Vowpal Wabbit learner in logistic regression mode
vw = VW(loss_function='logistic')
VW command: vw --save_resume --quiet --loss_function logistic --predictions /dev/stdout

Now generate 10 training examples, feeding them to VW one by one.
Label -1: ['d', 'u', 'k', 'y', 'M', 'A', 'V', 'm', 'L'] is mostly lowercase
Label -1: ['e', 'n', 'y', 'I', 's', 'q', 'z', 'L', 'G'] is mostly lowercase
Label 1: ['Y', 'A', 'D', 'e', 'm', 'O', 'y', 'Z', 'k'] is mostly uppercase
Label -1: ['s', 'n', 'J', 'x', 'E', 'i', 'f', 'd', 'S'] is mostly lowercase
Label 1: ['i', 'E', 'b', 'd', 'p', 'I', 'K', 'D', 'F'] is mostly uppercase
Label 1: ['f', 'C', 'Y', 'g', 'W', 'O', 'I', 'M', 'V'] is mostly uppercase
Label -1: ['H', 'y', 'U', 'a', 'i', 'b', 'f', 's', 'g'] is mostly lowercase
Label -1: ['p', 'C', 'v', 'A', 'd', 'k', 'Q', 'K', 'y'] is mostly lowercase
Label -1: ['O', 'I', 'c', 'G', 'd', 'o', 'f', 'l', 'y'] is mostly lowercase
Label -1: ['q', 'W', 'k', 'M', 'R', 'V', 'd', 'i', 'u'] is mostly lowerc

In [7]:
examples

[(1, ['E', 'e', 'F', 'p', 'c', 'V', 'x', 'G', 'A']),
 (1, ['U', 'D', 'S', 'd', 'H', 'a', 'k', 'e', 'G']),
 (-1, ['w', 'u', 'd', 'T', 'W', 'k', 'N', 'r', 'b']),
 (1, ['W', 'Z', 'C', 'M', 'z', 'N', 'o', 'f', 'u']),
 (1, ['I', 'c', 'S', 'E', 'y', 'r', 'B', 't', 'X']),
 (-1, ['k', 'w', 'T', 'a', 'b', 'A', 'Z', 'D', 'c']),
 (1, ['N', 'n', 't', 'Q', 'l', 'J', 'i', 'S', 'X']),
 (1, ['F', 'X', 'y', 'Z', 'C', 'c', 'b', 'L', 'f']),
 (1, ['L', 'E', 'o', 'P', 'W', 'T', 'S', 's', 'd']),
 (-1, ['s', 'r', 'b', 'j', 'm', 'T', 'y', 'q', 'd']),
 (-1, ['A', 'o', 'Z', 'p', 'u', 'x', 'e', 'N', 'U']),
 (1, ['A', 'T', 'Y', 'g', 'e', 'F', 'q', 'J', 'v']),
 (1, ['H', 's', 't', 'B', 'J', 'V', 'E', 'e', 'S']),
 (1, ['R', 'F', 't', 'S', 'a', 'c', 'V', 'j', 'K']),
 (1, ['Q', 'U', 'C', 'k', 'c', 'e', 'p', 'O', 'I']),
 (-1, ['q', 'C', 'b', 'p', 'y', 'H', 'T', 'o', 'v']),
 (1, ['I', 'b', 'q', 'L', 'T', 'W', 'J', 'v', 'N']),
 (-1, ['t', 'R', 's', 'w', 'E', 'z', 'o', 'M', 'T']),
 (1, ['S', 'L', 'n', 't', 'I', 'C', 'v',

In [17]:
for example in examples:
    label,features = example
print(label)
print(features)

-1
['k', 'L', 'x', 'v', 'p', 'h', 't', 'r', 'w']


In [22]:
l,f = get_example()
print(l)
print(f)

-1
['X', 'y', 'x', 'C', 'K', 'c', 'u', 'M', 'a']


In [33]:
for i in range(1000):
    label, features = get_example()
    #print(label,features)
    vw2.send_example(label, features=features)

EOF: End Of File (EOF). Exception style platform.
<pexpect.pty_spawn.spawn object at 0x7f3b8c81de10>
command: /usr/local/bin/vw
args: [u'/usr/local/bin/vw', u'-i', u'capitalization.saved.model', u'--save_resume', u'--quiet', u'--loss_function', u'logistic', u'--predictions', u'/dev/stdout']
searcher: None
buffer (last 100 chars): ''
before (last 100 chars): ''
after: <class 'pexpect.exceptions.EOF'>
match: None
match_index: None
exitstatus: 0
flag_eof: True
pid: 6879
child_fd: 55
closed: False
timeout: 30
delimiter: <class 'pexpect.exceptions.EOF'>
logfile: None
logfile_read: None
logfile_send: None
maxread: 2000
ignorecase: False
searchwindowsize: None
delaybeforesend: 0
delayafterclose: 0.1
delayafterterminate: 0.1

In [31]:
start_time = time.time()
for example in examples:
    label, features = example
    print
    # Turning off parse_result mode speeds up training when we
    # don't care about the result of each example
    vw2.send_example(label, features=features, parse_result=False)


-1 ['f', 't', 'Q', 'c', 'C', 'h', 'M', 's', 'J']
1 ['D', 'U', 'O', 'H', 'L', 's', 'd', 'n', 'R']


EOF: End Of File (EOF). Exception style platform.
<pexpect.pty_spawn.spawn object at 0x7f3b8c81de10>
command: /usr/local/bin/vw
args: [u'/usr/local/bin/vw', u'-i', u'capitalization.saved.model', u'--save_resume', u'--quiet', u'--loss_function', u'logistic', u'--predictions', u'/dev/stdout']
searcher: None
buffer (last 100 chars): ''
before (last 100 chars): ''
after: <class 'pexpect.exceptions.EOF'>
match: None
match_index: None
exitstatus: 0
flag_eof: True
pid: 6879
child_fd: 55
closed: False
timeout: 30
delimiter: <class 'pexpect.exceptions.EOF'>
logfile: None
logfile_read: None
logfile_send: None
maxread: 2000
ignorecase: False
searchwindowsize: None
delaybeforesend: 0
delayafterclose: 0.1
delayafterterminate: 0.1