In [7]:
import tensorflow as tf 
import numpy as np 
import sys
import codecs
from random import randint
import datetime
from sklearn.utils import shuffle
import pickle
import os



In [67]:
def createTrainMatrixes(conversationFileName,wList,maxLen):
    convDict =np.load(conversationFileName).item()
    #print(list(convDict.items())[0:100])
    numExamples = len(convDict)
    xTrain = np.zeros((numExamples, maxLen), dtype='int32')
    yTrain = np.zeros((numExamples, maxLen), dtype='int32')
    
    for index,(key,value) in enumerate(convDict.items()):
        encoderMessage = np.full((maxLen),wList.index('<pad>'),dtype = 'int32')
        decoderMessage = np.full((maxLen),wList.index('<pad>'),dtype = 'int32')
        
        keySplit = key.split()
        valueSplit = value.split()
        keyCount = len(keySplit)
        valueCount = len(valueSplit)
        #print(keyCount)
        if(keyCount > (maxLen -1) or valueCount > (maxLen - 1) or valueCount == 0 or keyCount == 0):
            continue
        for keyIndex, word in enumerate(keySplit):
            try:
                encoderMessage[keyIndex] = wList.index(word)
            except ValueError:
                encoderMessage[keyIndex] = 0
            
        encoderMessage[keyIndex + 1] = wList.index('<EOS>')
        
        for valueIndex,word in enumerate(valueSplit):
            try:
                decoderMessage[valueIndex] = wList.index(word)
            except ValueError:
                decoderMessage[valueIndex] = 0
        
        decoderMessage[valueIndex+1]=wList.index('<EOS>')
        xTrain[index] = encoderMessage
        yTrain[index] = decoderMessage
    
    yTrain = yTrain[~np.all(yTrain == 0, axis=1)]
    xTrain = xTrain[~np.all(xTrain == 0, axis=1)]
    numExamples = xTrain.shape[0]
    
    return numExamples,xTrain,yTrain

In [77]:
def getBatch(localXTrain,localYTrain,batch_size,maxLen):
    num = randint(0,numTrainingExamples - batch_size - 1)
    arr = localXTrain[num:num + batch_size]
    labels = localYTrain[num:num + batch_size]
    
    reversedList = list(arr)
    
    for index,example in enumerate(reversedList):
        reversedList[index] = list(reversed(example))
    
    laggedLabels = []
    EOStokenindex = wordList.index('<EOS>')
    padTokenIndex = wordList.index('<pad>')
    for example in labels:
        eosFound = np.argwhere(example == EOStokenindex)[0]
        shiftedExample = np.roll(example,1)
        shiftedExample[0] = EOStokenindex
        
        if(eosFound != (maxLen-1)):
            shiftedExample[eosFound+1]=padTokenIndex
        laggedLabels.append(shiftedExample)
    
    reversedList = np.asarray(reversedList).T.tolist()
    labels = labels.T.tolist()
    laggedLabels = np.asarray(laggedLabels).T.tolist()
    
    return reversedList, labels, laggedLabels

In [4]:
def translateToSentences(inputs, wList, encoder=False):
	EOStokenIndex = wList.index('<EOS>')
	padTokenIndex = wList.index('<pad>')
	numStrings = len(inputs[0])
	numLengthOfStrings = len(inputs)
	listOfStrings = [''] * numStrings
	for mySet in inputs:
		for index,num in enumerate(mySet):
			if (num != EOStokenIndex and num != padTokenIndex):
				if (encoder):
					# Encodings are in reverse!
					listOfStrings[index] = wList[num] + " " + listOfStrings[index]
				else:
					listOfStrings[index] = listOfStrings[index] + " " + wList[num]
	listOfStrings = [string.strip() for string in listOfStrings]
	return listOfStrings

In [5]:
def getTestInput(inputMessage, wList, maxLen):
	encoderMessage = np.full((maxLen), wList.index('<pad>'), dtype='int32')
	inputSplit = inputMessage.lower().split()
	for index,word in enumerate(inputSplit):
		try:
			encoderMessage[index] = wList.index(word)
		except ValueError:
			continue
	encoderMessage[index + 1] = wList.index('<EOS>')
	encoderMessage = encoderMessage[::-1]
	encoderMessageList=[]
	for num in encoderMessage:
		encoderMessageList.append([num])
	return encoderMessageList

In [6]:
def idsToSentence(ids, wList):
    EOStokenIndex = wList.index('<EOS>')
    padTokenIndex = wList.index('<pad>')
    myStr = ""
    listOfResponses=[]
    for num in ids:
        if (num[0] == EOStokenIndex or num[0] == padTokenIndex):
            listOfResponses.append(myStr)
            myStr = ""
        else:
            myStr = myStr + wList[num[0]] + " "
    if myStr:
        listOfResponses.append(myStr)
    listOfResponses = [i for i in listOfResponses if i]
    return listOfResponses

In [14]:
from collections import Counter

def processDataset(filename1,filename2):
    openedFile1 = codecs.open(filename1, 'r','utf-8')
    openedFile2 = codecs.open(filename2, 'r','utf-8')
    allLines1 = openedFile1.readlines()
    allLines2 = openedFile2.readlines()
    myStr = ""
    for line in allLines1:
        myStr += line
    for line in allLines2:
        myStr += line
    finalDict = Counter(myStr.split())
    return myStr, finalDict

In [30]:
def read_data(filename):
    with codecs.open(filename, 'r','utf-8') as myfile:
        data=myfile.read().split()
        return data
words = read_data("questions.txt")
words.extend(read_data("responses.txt"))
print(len(words))
print(words[100:11000])
wordList = words

459207
['пива', 'да', 'ахха', 'работают', 'наушники', 'а', 'завтра', 'может', 'доедем', 'вместе', 'конференция', 'в', 'школа', 'саня', 'ты', 'как', 'а', 'в', 'школе', 'ты', 'есть', 'мы', '30', 'жранем', 'дааааа', 'аххха', 'я', 'коварен', 'это', 'да', 'чего', 'чгк', 'нет', 'саааш', 'ты', 'знаешь', 'что', 'будет', 'на', 'контрольной', 'по', 'химии', 'в', 'смысле', 'определение', 'кррешетки', 'или', 'определение', 'типа', 'решетки', 'как', 'определить', 'тип', 'решетки', 'блиииин', 'спасибо', 'спасибо', 'что', 'ж', 'ты', 'в', 'контактах', 'такая', 'погода', 'ты', 'на', 'даче', 'что', 'так', 'я', 'в', 'москве', 'вчера', 'салют', 'видели', 'близко-близко)', 'московский)', 'что', 'как', 'ты', 'видел', 'из', 'окна', 'московский', 'салют', 'ты', 'пьян', 'как', 'ты', 'видел', 'из', 'питера', 'московский', 'салют', 'в', 'окно', 'да', 'там', 'джинсы', 'вибрировали', 'от', 'залпа', 'что', 'смеешься', 'от', 'ударной', 'волны', '13', 'прости', '13', 'не', 'тебе', 'спасибо', 'санек', 'как', 'раз', 'в

459207


In [68]:
batchSize = 24
maxEncoderLength = 15
maxDecoderLength = maxEncoderLength
lstmUnits = 112
embeddingDim = lstmUnits
numLayersLSTM = 3
numIterations = 500000

vocabSize = len(wordList)

wordVectors = np.load('final_embeddings.npy')
wordVecDimensions = wordVectors.shape[1]

padVector = np.zeros((1,wordVecDimensions),dtype='int32')
EOSVector = np.zeros((1,wordVecDimensions),dtype='int32')

wordVectors = np.concatenate((wordVectors,padVector), axis=0)
wordVectors = np.concatenate((wordVectors,EOSVector), axis=0)

wordList.append('<pad>')
wordList.append('<EOS>')

vocabSize += 2

numTrainingExamples, xTrain, yTrain = createTrainMatrixes('conversationDictionary.npy', wordList, maxEncoderLength)
np.save('Seq2SeqXTrain.npy', xTrain)
np.save('Seq2SeqYTrain.npy', yTrain)
print ('Finished creating training matrixes')



Finished creating training matrixes


In [82]:
tf.reset_default_graph()

#Input data
encoderInputs = [tf.placeholder(tf.int32, shape=(None,)) for i in range(maxEncoderLength)]
decoderLabels = [tf.placeholder(tf.int32, shape=(None,)) for i in range(maxDecoderLength)]
decoderInputs = [tf.placeholder(tf.int32, shape=(None,)) for i in range(maxDecoderLength)]
feedPrevious = tf.placeholder(tf.bool)

encoderLSTM = tf.nn.rnn_cell.BasicLSTMCell(lstmUnits,state_is_tuple=True)

decoderOutputs, decoderFinalState = tf.contrib.legacy_seq2seq.embedding_rnn_seq2seq(
                                        encoderInputs, decoderInputs, 
                                            encoderLSTM, vocabSize, vocabSize, 
                                                embeddingDim, feed_previous=feedPrevious)

decoderPrediction = tf.argmax(decoderOutputs,2)

lossWeights = [tf.ones_like(l,dtype=tf.float32) for l in decoderLabels]
loss = tf.contrib.legacy_seq2seq.sequence_loss(decoderOutputs,decoderLabels,
                                               lossWeights,vocabSize)
optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)


In [83]:
num_steps = 100001

sess = tf.Session()
saver = tf.train.Saver()
# If you're loading in a saved model, use the following
#saver.restore(sess, tf.train.latest_checkpoint('models/'))
sess.run(tf.global_variables_initializer())

# Uploading results to Tensorboard
tf.summary.scalar('Loss', loss)
merged = tf.summary.merge_all()
logdir = "tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
writer = tf.summary.FileWriter(logdir, sess.graph)

# Some test strings that we'll use as input at intervals during training
encoderTestStrings = ["привет",
					"лол",
					"как дела?",
					"что делаешь?",
					"как ты?"
					]

zeroVector = np.zeros((1), dtype='int32')

for i in range(numIterations):

	encoderTrain, decoderTargetTrain, decoderInputTrain = getBatch(xTrain, yTrain, batchSize, maxEncoderLength)
	feedDict = {encoderInputs[t]: encoderTrain[t] for t in range(maxEncoderLength)}
	feedDict.update({decoderLabels[t]: decoderTargetTrain[t] for t in range(maxDecoderLength)})
	feedDict.update({decoderInputs[t]: decoderInputTrain[t] for t in range(maxDecoderLength)})
	feedDict.update({feedPrevious: False})

	curLoss, _, pred = sess.run([loss, optimizer, decoderPrediction], feed_dict=feedDict)
	
	if (i % 500 == 0):
		print('Current loss:{0} at iteration {1}'.format(curloss,i))
		summary = sess.run(merged, feed_dict=feedDict)
		writer.add_summary(summary, i)
	if (i % 1000 == 0 and i != 0):
		num = randint(0,len(encoderTestStrings) - 1)
		print (encoderTestStrings[num])
		inputVector = getTestInput(encoderTestStrings[num], wordList, maxEncoderLength);
		feedDict = {encoderInputs[t]: inputVector[t] for t in range(maxEncoderLength)}
		feedDict.update({decoderLabels[t]: zeroVector for t in range(maxDecoderLength)})
		feedDict.update({decoderInputs[t]: zeroVector for t in range(maxDecoderLength)})
		feedDict.update({feedPrevious: True})
		ids = (sess.run(decoderPrediction, feed_dict=feedDict))
		print (idsToSentence(ids, wordList))

	if (i % 10000 == 0 and i != 0):
		savePath = saver.save(sess, "models/pretrained_seq2seq.ckpt", global_step=i)

ResourceExhaustedError: OOM when allocating tensor with shape[112,459247]
	 [[Node: embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel/Adam_1/Assign = Assign[T=DT_FLOAT, _class=["loc:@embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel/Adam_1, embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel/Adam/Initializer/zeros)]]

Caused by op 'embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel/Adam_1/Assign', defined at:
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelapp.py", line 478, in start
    self.io_loop.start()
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelbase.py", line 281, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelbase.py", line 232, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\kernelbase.py", line 397, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-82-e3266ee5e5d5>", line 21, in <module>
    optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\optimizer.py", line 353, in minimize
    name=name)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\optimizer.py", line 474, in apply_gradients
    self._create_slots([_get_variable_for(v) for v in var_list])
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\adam.py", line 137, in _create_slots
    self._zeros_slot(v, "v", self._name)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\optimizer.py", line 796, in _zeros_slot
    named_slots[_var_key(var)] = slot_creator.create_zeros_slot(var, op_name)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\slot_creator.py", line 174, in create_zeros_slot
    colocate_with_primary=colocate_with_primary)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\slot_creator.py", line 148, in create_slot_with_initializer
    dtype)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\training\slot_creator.py", line 67, in _create_slot_var
    validate_shape=validate_shape)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1203, in get_variable
    constraint=constraint)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1092, in get_variable
    constraint=constraint)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 425, in get_variable
    constraint=constraint)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 394, in _true_getter
    use_resource=use_resource, constraint=constraint)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 805, in _get_single_variable
    constraint=constraint)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\variables.py", line 213, in __init__
    constraint=constraint)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\variables.py", line 346, in _init_from_args
    validate_shape=validate_shape).op
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\state_ops.py", line 276, in assign
    validate_shape=validate_shape)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\ops\gen_state_ops.py", line 56, in assign
    use_locking=use_locking, name=name)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
    op_def=op_def)
  File "C:\Users\User\Anaconda3\envs\tensorflow-gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[112,459247]
	 [[Node: embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel/Adam_1/Assign = Assign[T=DT_FLOAT, _class=["loc:@embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel/Adam_1, embedding_rnn_seq2seq/embedding_rnn_decoder/rnn_decoder/output_projection_wrapper/kernel/Adam/Initializer/zeros)]]
