In [0]:
!apt-get install --no-install-recommends openmpi-bin libopenmpi-dev libopencv-dev python3-opencv python-opencv && ln -sf /usr/lib/x86_64-linux-gnu/libmpi_cxx.so /usr/lib/x86_64-linux-gnu/libmpi_cxx.so.1 && ln -sf /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so /usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.12 && ln -sf /usr/lib/x86_64-linux-gnu/libmpi.so /usr/lib/x86_64-linux-gnu/libmpi.so.12 && pip install cntk


In [2]:
import math
import numpy as np
import os
from __future__ import print_function # Use a function definition from future version (say 3.x from 2.7 interpreter)

import cntk as C
import cntk.tests.test_utils
cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system)
C.cntk_py.set_fixed_random_seed(1) # fix a random seed for CNTK components



In [0]:
from google.colab import files

In [7]:
files.upload()

Saving train.pair.tok.ctf.csv to train.pair.tok.ctf.csv
Saving valid.pair.tok.ctf.csv to valid.pair.tok.ctf.csv
Saving vocab_A.wl.csv to vocab_A.wl.csv
Saving vocab_Q.wl.csv to vocab_Q.wl.csv


{'train.pair.tok.ctf.csv': b'0\t|S0 1160:1\t|S1 956:1\r\n0\t|S0 569:1\t|S1 958:1\r\n0\t|S0 806:1\t|S1 55:1\r\n0\t|S0 357:1\t|S1 956:1\r\n0\t|S0 746:1\t|S1 787:1\r\n0\t|S0 455:1\t|S1 7:1\r\n0\t|S0 532:1\t\r\n1\t|S0 1154:1\t|S1 356:1\r\n1\t|S0 80:1\t|S1 7:1\r\n1\t|S0 61:1\t|S1 356:1\r\n1\t|S0 1052:1\t|S1 356:1\r\n1\t|S0 700:1\t|S1 687:1\r\n1\t|S0 357:1\t|S1 356:1\r\n1\t|S0 504:1\t\r\n1\t|S0 126:1\t\r\n1\t|S0 549:1\t\r\n2\t|S0 1157:1\t|S1 47:1\r\n2\t|S0 337:1\t|S1 375:1\r\n2\t|S0 357:1\t|S1 242:1\r\n2\t|S0 803:1\t|S1 876:1\r\n2\t\t|S1 889:1\r\n2\t\t|S1 920:1\r\n3\t|S0 1160:1\t|S1 93:1\r\n3\t|S0 569:1\t|S1 98:1\r\n3\t|S0 663:1\t|S1 97:1\r\n3\t|S0 1070:1\t|S1 93:1\r\n3\t|S0 357:1\t|S1 96:1\r\n3\t\t|S1 98:1\r\n4\t|S0 1160:1\t|S1 356:1\r\n4\t|S0 1140:1\t|S1 7:1\r\n4\t|S0 357:1\t|S1 356:1\r\n4\t|S0 549:1\t|S1 356:1\r\n4\t|S0 967:1\t|S1 687:1\r\n4\t|S0 1021:1\t|S1 171:1\r\n5\t|S0 1154:1\t|S1 685:1\r\n5\t|S0 1112:1\t|S1 691:1\r\n5\t|S0 321:1\t|S1 315:1\r\n5\t|S0 357:1\t|S1 315:1\r\n5\t|S0 467:1\

In [0]:
#train.pair.tok.ctf.csv
#valid.pair.tok.ctf.csv
#vocab_A.wl.csv
#vocab_A.wl.csv

In [0]:
import pandas as pd

In [0]:
train_file  = pd.read_csv('train.pair.tok.ctf.csv')


In [0]:
validation_file = pd.read_csv('valid.pair.tok.ctf.csv')

In [0]:
# Define the vocabulary size (QRY-stands for question and ANS stands for answer)
QRY_SIZE = 1204
ANS_SIZE = 1019



In [0]:
# Create the containers for input feature (x) and the label (y)
qry = C.sequence.input_variable(QRY_SIZE)
ans = C.sequence.input_variable(ANS_SIZE)

In [0]:
# Create the containers for input feature (x) and the label (y)
axis_qry = C.Axis.new_unique_dynamic_axis('axis_qry')
qry = C.sequence.input_variable(QRY_SIZE, sequence_axis=axis_qry)

axis_ans = C.Axis.new_unique_dynamic_axis('axis_ans')
ans = C.sequence.input_variable(ANS_SIZE, sequence_axis=axis_ans)

In [0]:
EMB_DIM   = 25 # Embedding dimension
HIDDEN_DIM = 50 # LSTM dimension
DSSM_DIM = 25 # Dense layer dimension  
NEGATIVE_SAMPLES = 5
DROPOUT_RATIO = 0.2

In [0]:
def create_model(qry, ans):
    with C.layers.default_options(initial_state=0.1):
        qry_vector = C.layers.Sequential([
            C.layers.Embedding(EMB_DIM, name='embed'),
            C.layers.Recurrence(C.layers.LSTM(HIDDEN_DIM), go_backwards=False),
            C.sequence.last,
            C.layers.Dense(DSSM_DIM, activation=C.relu, name='q_proj'),
            C.layers.Dropout(DROPOUT_RATIO, name='dropout qdo1'),
            C.layers.Dense(DSSM_DIM, activation=C.tanh, name='q_enc')
        ])
        
        ans_vector = C.layers.Sequential([
            C.layers.Embedding(EMB_DIM, name='embed'),
            C.layers.Recurrence(C.layers.LSTM(HIDDEN_DIM), go_backwards=False),
            C.sequence.last,
            C.layers.Dense(DSSM_DIM, activation=C.relu, name='a_proj'),
            C.layers.Dropout(DROPOUT_RATIO, name='dropout ado1'),
            C.layers.Dense(DSSM_DIM, activation=C.tanh, name='a_enc')
        ])

    return {
        'query_vector': qry_vector(qry),
        'answer_vector': ans_vector(ans)
    }

# Create the model and store reference in `network` dictionary
network = create_model(qry, ans)

network['query'], network['axis_qry'] = qry, axis_qry
network['answer'], network['axis_ans'] = ans, axis_ans

In [0]:
def create_loss(vector_a, vector_b):
    qry_ans_similarity = C.cosine_distance_with_negative_samples(vector_a, \
                                                                 vector_b, \
                                                                 shift=1, \
                                                                 num_negative_samples=5)
    return 1 - qry_ans_similarity

In [0]:
# Model parameters
MAX_EPOCHS = 5
EPOCH_SIZE = 10000
MINIBATCH_SIZE = 50

In [0]:
# Create trainer
def create_trainer(reader, network):
    
    # Setup the progress updater
    progress_writer = C.logging.ProgressPrinter(tag='Training', num_epochs=MAX_EPOCHS)

    # Set learning parameters
    lr_per_sample     = [0.0015625]*20 + \
                        [0.00046875]*20 + \
                        [0.00015625]*20 + \
                        [0.000046875]*10 + \
                        [0.000015625]
    lr_schedule       = C.learning_parameter_schedule_per_sample(lr_per_sample, \
                                                 epoch_size=EPOCH_SIZE)
    mms               = [0]*20 + [0.9200444146293233]*20 + [0.9591894571091382]
    mm_schedule       = C.learners.momentum_schedule(mms, \
                                                     epoch_size=EPOCH_SIZE, \
                                                     minibatch_size=MINIBATCH_SIZE)
    l2_reg_weight     = 0.0002

    model = C.combine(network['query_vector'], network['answer_vector'])

    #Notify the network that the two dynamic axes are indeed same
    query_reconciled = C.reconcile_dynamic_axes(network['query_vector'], network['answer_vector'])
  
    network['loss'] = create_loss(query_reconciled, network['answer_vector'])
    network['error'] = None

    print('Using momentum sgd with no l2')
    dssm_learner = C.learners.momentum_sgd(model.parameters, lr_schedule, mm_schedule)

    network['learner'] = dssm_learner
 
    print('Using local learner')
    # Create trainer
    return C.Trainer(model, (network['loss'], network['error']), network['learner'], progress_writer)  

In [22]:
# Instantiate the trainer
trainer = create_trainer(train_file, network)

Using momentum sgd with no l2
Using local learner


In [0]:
# Train 
def do_train(network, trainer, train_source):
    # define mapping from intput streams to network inputs
    input_map = {
        network['query']: train_source.query,
        network['answer']: train_source.answer
        } 

    t = 0
    for epoch in range(MAX_EPOCHS):         # loop over epochs
        epoch_end = (epoch+1) * EPOCH_SIZE
        while t < epoch_end:                # loop over minibatches on the epoch
            data = train_source.next_minibatch(MINIBATCH_SIZE, input_map= input_map)  # fetch minibatch
            trainer.train_minibatch(data)               # update model with it
            t += MINIBATCH_SIZE

        trainer.summarize_training_progress()

In [28]:
do_train(network, trainer, train_file)

AttributeError: ignored