In [None]:
import os
os.environ['THEANO_FLAGS'] = 'floatX=float32,device=gpu'
#os.environ['THEANO_FLAGS'] = 'floatX=float32,device=gpu,optimizer=fast_compile'
os.environ["PATH"] += os.pathsep + "/usr/local/cuda/bin/"
#import sys
#sys.path.append('/data/fs4/home/bradh/')
import theano
import numpy as np
import random
from copy import copy

import learningfunctions

import blocks
from blocks.bricks import Linear, Softmax, Softplus, NDimensionalSoftmax, BatchNormalizedMLP, \
                                Rectifier, Logistic, Tanh, MLP
from blocks.bricks.recurrent import GatedRecurrent, LSTM
from blocks.bricks.parallel import Fork
from blocks.initialization import Constant, IsotropicGaussian, Identity, Uniform
from blocks.bricks.cost import BinaryCrossEntropy, CategoricalCrossEntropy
from blocks.filter import VariableFilter
from blocks.roles import PARAMETER
from blocks.graph import ComputationGraph
from six.moves import cPickle as pickle

import theano
from theano import tensor as T

In [None]:
import sys
sys.setrecursionlimit(5000)

In [None]:
MODEL_DIR = "/local_data/kylez/altair_runs/fn0_dev"

In [None]:
data = ['this is data', 'we like data', 'once upon a time']

In [None]:
from redbaron import RedBaron

with open("sample.py", "r") as f:
    source = f.read()
with open("sample.py", "r") as f:
    lines = f.readlines()

red = RedBaron(source)
data = []
for fn_node in red.findAll("DefNode"):
    starting_line = fn_node.absolute_bounding_box.top_left.to_tuple()[0]
    ending_line = fn_node.absolute_bounding_box.bottom_right.to_tuple()[0]
    fn_lines = lines[starting_line-1:ending_line-1]
    data.append("".join(fn_lines).rstrip())

In [None]:
#data = data[:3]
data = [d[:20] for d in data]

In [None]:
data

In [None]:
def clip_norm(g, c, n): 
    '''n is the norm, c is the threashold, and g is the gradient'''
    
    if c > 0: 
        g = T.switch(T.ge(n, c), g*c/n, g) 
    return g
def clip_norms(gs, c):
    norm = T.sqrt(sum([T.sum(g**2) for g in gs]))
    return [clip_norm(g, c, norm) for g in gs]

In [None]:
# UNK token will be second to last dimension
# EOS token will always be the last dimension
# If desired_length is not specified, desired_length will be len(document)
# If len(document) < desired_length, add an EOS token an pad with zero vectors to reach desired_length
# If len(document) > desired_length, truncate to desired_length
def encode_document(document, desired_length=-1, min_unicode_idx=0, max_unicode_idx=128):
    UNK_IDX = max_unicode_idx
    EOS_IDX = max_unicode_idx + 1
    if desired_length == -1:
        desired_length = len(document)
    encoded = np.zeros((desired_length, max_unicode_idx-min_unicode_idx+2)) # +2 for UNK and EOS tokens
    for doc_idx, char in enumerate(document[:desired_length]):
        char_encoding = ord(char)
        if not min_unicode_idx <= char_encoding < max_unicode_idx:
            char_encoding = UNK_IDX
        encoded[doc_idx, char_encoding-min_unicode_idx] = 1
    if len(document) < desired_length:
        encoded[len(document[:desired_length]), EOS_IDX-min_unicode_idx] = 1
    #encoded[len(document[:desired_length]), EOS_IDX-min_unicode_idx] = 1
    return encoded.reshape(encoded.shape[0], 1, encoded.shape[1])

# By default, desired_length will be the length of the longest document in documents.
def encode_documents(documents, desired_length=-1, min_unicode_idx=0, max_unicode_idx=128):
    if desired_length == -1:
        desired_length = max([len(document) for document in documents])
    encodeds = []
    for document in documents:
        encodeds.append(encode_document(document, desired_length, min_unicode_idx, max_unicode_idx))
    e = np.array(encodeds)
    return e

# encoded must be one-hot, encoded via encode_document()
def decode_document(encoded, min_unicode_idx=0, max_unicode_idx=128, unk_decode_idx=32):
    UNK_IDX = max_unicode_idx
    EOS_IDX = max_unicode_idx + 1
    decoded = ""
    for idx in np.nonzero(encoded)[2]:
        candidate = idx + min_unicode_idx
        if candidate == UNK_IDX:
            candidate = unk_decode_idx
        elif candidate == EOS_IDX:
            break
        decoded += chr(candidate)
    return decoded

def decode_documents(encodeds, min_unicode_idx=0, max_unicode_idx=128):
    decodeds = []
    for encoded in encodeds:
        decodeds.append(decode_document(encoded, min_unicode_idx, max_unicode_idx))
    return decodeds

In [None]:
def one_hot_conversion(predictions):
    converted = np.zeros(predictions.shape)
    for prediction_idx, prediction in enumerate(predictions):
        for elem_idx, elem in enumerate(prediction):
            converted[prediction_idx, elem_idx, 0, np.argmax(elem[0])] = 1
    return converted

In [None]:
X = T.tensor4('X')
rnnType = 'gru'
dimIn = 130
dim = 20
wtstd = 0.5
rnnbias_init = Constant(0.0)
rnnwt_init = IsotropicGaussian(wtstd)
linewt_init = IsotropicGaussian(wtstd)
line_bias = Constant(1.0)

lr = 0.0001
decay = 0.9
decay_itr = 15000
pickle_itr = 10000
learning_rate = theano.shared(np.array(lr, dtype=theano.config.floatX))
learning_decay = np.array(decay, dtype=theano.config.floatX)

clippings = 1000

In [None]:
def encoding_step(data_in, fork, rnn):
    def step(data_in):
        d1, d2 = fork.apply(data_in)
        result = rnn.apply(d1, d2)
        return result
    hEnc, _ = theano.scan(step, data_in)
    return hEnc

In [None]:
# Encoding Step
dimMultiplier = 2

relu = Rectifier()
rnn = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru', activation=relu)
fork = Fork(output_names=['linear', 'gates'],
            name='fork', input_dim=dimIn, output_dims=[dim, dim * dimMultiplier], 
            weights_init = linewt_init, biases_init = line_bias)

hEnc = encoding_step(X, fork, rnn)

rnn2 = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru', activation=relu)
fork2 = Fork(output_names=['linear', 'gates'],
            name='fork', input_dim=dim, output_dims=[dim, dim * dimMultiplier], 
            weights_init = linewt_init, biases_init = line_bias)

hEnc2 = encoding_step(hEnc, fork2, rnn2)

rnn3 = GatedRecurrent(dim=dim, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru', activation=relu)
fork3 = Fork(output_names=['linear', 'gates'],
            name='fork', input_dim=dim, output_dims=[dim, dim * dimMultiplier], 
            weights_init = linewt_init, biases_init = line_bias)

hEnc3 = encoding_step(hEnc2, fork3, rnn3)

forkD = Fork(output_names=['linear', 'gates'],
            name='fork', input_dim=dim, output_dims=[dimIn, dimIn * dimMultiplier], 
            weights_init = linewt_init, biases_init = line_bias)

h4decoder = hEnc3[:,-1,:,:].reshape((-1, 1,1,20))
h4reshape, _ = forkD.apply(h4decoder)

In [None]:
# Decoding Step
rnn4 = GatedRecurrent(dim=dimIn, weights_init = rnnwt_init, biases_init = rnnbias_init, name = 'gru')
#TOTHINK: transform before the decoder or after
fork4 = Fork(output_names=['linear', 'gates'],
            name='fork', input_dim=dimIn, output_dims=[dimIn, dimIn * dimMultiplier], 
            weights_init = linewt_init, biases_init = line_bias)

targets = T.concatenate((h4reshape, X[:,:-1, :,:]), axis=1)
hDec = encoding_step(targets, fork4, rnn4)

In [None]:
forks = [fork, fork2, fork3, fork4, forkD]
rnns = [rnn, rnn2, rnn3, rnn4]

for fork in forks:
    fork.initialize()
for rnn in rnns:
    rnn.initialize()

In [None]:
predTargets = T.exp(hDec)/T.sum(T.exp(hDec), axis=(3,2), keepdims=True)
#precost = -X.squeeze*T.log(predTargets.squeeze()) - (1-X.squeeze())*T.log(1-predTargets.squeeze())

#ADDLATER: beam search
cost = T.mean(T.sum(T.nnet.categorical_crossentropy(predTargets, X), axis = 1))

In [None]:
cg = ComputationGraph([cost])
params = VariableFilter(roles = [PARAMETER])(cg.variables)

###To check gradients for explosion/shrinkage
print('compiling graph you talented soul')
gradients = T.grad(cost, params)
gradients = clip_norms(gradients, clippings)
gradientFun = theano.function([X, predTargets], gradients, allow_input_downcast=True)
print('finished gradientFun')

learning = learningfunctions.Learning(cost,params,learning_rate,l1=0.,l2=0.,maxnorm=0.,c=clippings)
updates = learning.Adam() 

classifierTrain = theano.function([X], [cost, predTargets], 
                                  updates=updates, allow_input_downcast=True)
print('finished classifierTrain')
#classifierPredict = theano.function([X], [softoutClass, attEncpred, attContextpred], allow_input_downcast=True)
classifierPredict = theano.function([X], predTargets, allow_input_downcast=True)
print('finished classifierPredict')

In [None]:
classifierTrain.get_shared()[0].get_value()

In [None]:
num_epochs = 100000
cur_epoch = 0

In [None]:
min_unicode = 0
max_unicode = 128
encoded = encode_documents(data, min_unicode_idx=min_unicode, max_unicode_idx=max_unicode)
for epoch in range(cur_epoch, num_epochs):
    result_cost, result_predTargets = classifierTrain(encoded)
    result_converted = one_hot_conversion(result_predTargets)
    if epoch % 1000 == 0:
        print("%s: %s" % (result_cost, decode_documents(result_converted, min_unicode_idx=min_unicode, max_unicode_idx=max_unicode)))
        grads = gradientFun(encoded, result_predTargets)
        for gra in grads:
            print('  gradient norms: ', np.linalg.norm(gra))
    if epoch % decay_itr == 0:
        learning_rate.set_value(learning_rate.get_value() * learning_decay)
    if epoch % pickle_itr == 0:
        with open(os.path.join(MODEL_DIR, "classifierTrain.%s.mdl" % epoch), "wb") as f:
            pickle.dump(classifierTrain, f, protocol=pickle.HIGHEST_PROTOCOL)
        with open(os.path.join(MODEL_DIR, "gradientFun.%s.mdl" % epoch), "wb") as f:
            pickle.dump(gradientFun, f, protocol=pickle.HIGHEST_PROTOCOL)
        with open(os.path.join(MODEL_DIR, "costs.txt"), "a") as f:
            f.write("%s\n" % result_cost)
    cur_epoch = epoch

In [None]:
classifierTrain.get_shared()[0].get_value()