In [1]:
from __future__ import absolute_import, division, print_function
import numpy as np
import os
import sys
sys.path.append(os.path.abspath('./'))
import casimir.data.named_entity_recognition as ner  # noqa: E402
import casimir.optim as optim  # noqa: E402

In [2]:
train_file = 'data/ner/eng.train'
dev_file = 'data/ner/eng.testa'
test_file = 'data/ner/eng.testb'

assert os.path.isfile(train_file), 'train_file {} does not exist'.format(train_file)
assert os.path.isfile(dev_file), 'dev_file {} does not exist'.format(dev_file)
assert os.path.isfile(test_file), 'test_file {} does not exist'.format(test_file)

In [3]:
# Step 1: Create IFO from input files
train_ifo, dev_ifo, test_ifo = ner.create_ner_ifo_from_data(train_file, dev_file, test_file,
                                                            smoothing_coefficient=10.0, num_highest_scores=5)

# Step 2: Set optimization parameters
l2reg = 0.1 / len(train_ifo)
print('l2reg:', l2reg)
l2penalty = optim.L2Penalty(l2reg)

dim = 2**16 - 1  # hard-coded. All features are hashed onto these dimensions.
model = np.zeros(dim)

# Step 3: Set optim_options and run optimization

# Casimir-SVRG, constant smoothing
optim_options_1 = {'grad_lipschitz_parameter': 32.0, 'initial_smoothing_coefficient': 10.0,
                   'warm_start': 'prev-iterate'}

# Casimir-SVRG, decaying smoothing
optim_options_2 = {'learning_rate': 2e-2, 'initial_smoothing_coefficient': 2.0, 'initial_moreau_coefficient': l2reg,
                   'warm_start': 'extrapolation'}

num_passes = 10

# Run optimization
model, logs = optim.optimize_ifo(model, train_ifo, algorithm='casimir_svrg', dev_ifo=dev_ifo, test_ifo=None,
                                 reg_penalties=[l2penalty], num_passes=num_passes, termination_criterion=None, seed=25,
                                 logging=True, verbose=True,
                                 optim_options=optim_options_2)

l2reg: 7.1219998575600035e-06
Epoch		Function		Dev_function	Dev_evaluation		Time
0.00		14.50188733		15.803692		0.019034		56.06


  average, "true nor predicted", 'F-score is', len(true_sum)


1.00		4.44737810		5.101916		0.620004		209.03
2.00		2.85205273		3.282753		0.711367		192.88
3.00		2.45146871		2.983348		0.729741		200.78
4.00		1.95302028		2.553582		0.746138		197.81
5.00		1.68331250		2.386698		0.756444		200.51
6.00		1.50544692		2.289160		0.762231		199.76
7.00		1.39805106		2.239929		0.767289		201.69
8.00		1.32876999		2.214173		0.769398		201.74
9.00		1.28147001		2.204602		0.772379		201.56
10.00		1.24629827		2.201327		0.772924		198.66
[[0, 14.501887329962253, 15.803692307692307, 0.01903370017292171, 56.05784797668457], [1, 4.447378098873719, 5.101915706138777, 0.6200035294846673, 209.03260374069214], [2, 2.852052727227906, 3.2827530112038774, 0.7113667971641784, 192.8810167312622], [3, 2.4514687083512543, 2.9833478940404707, 0.7297408803470422, 200.78296780586243], [4, 1.9530202849435019, 2.553582391069772, 0.7461378227028524, 197.81188893318176], [5, 1.6833124992135724, 2.3866976133212527, 0.7564435903837362, 200.5123734474182], [6, 1.505446924641066, 2.2891599289775617, 0

In [6]:
model.shape

(65535,)