In [1]:
import tensorflow as tf
import numpy as np

from data import DataCorpus
from btgym.research.casual_conv.networks import conv_1d_casual_encoder

%matplotlib inline
import seaborn
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [8]:
tf.reset_default_graph()

data = DataCorpus(
    'data/sparse_array_train.npz',
    'data/sparse_array_test.npz',
    batch_size=64,
    cv_fraction=0.2,
    train_repeats=100,
    full_shuffle=True,
    log_transform=True
    
)

train_learn_rate = 1e-4

with tf.variable_scope(name_or_scope='model', reuse=False):
    hidden = conv_1d_casual_encoder(
        data.next_train_batch_op[0]['features'],
        None,
        None,
        conv_1d_num_filters=32,
        conv_1d_filter_size=2,
        conv_1d_activation=tf.nn.elu,
        reuse=False,
    )

    hidden = tf.layers.flatten(hidden)

    hidden = tf.layers.dense(
        inputs=hidden, 
        units=256,
        activation=tf.nn.elu,
    )
    
    predicted_traget_sum = tf.layers.dense(
        inputs=hidden, 
        units=1,
        activation=tf.nn.elu,
    )
    
    predicted_flag_logits = tf.layers.dense(
        inputs=hidden, 
        units=2,
        activation=tf.nn.elu,
    )
    
    
    var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)

    regress_loss = tf.losses.mean_squared_error(
        labels=data.next_train_batch_op[-1]['target_sum'][..., None],
        predictions=predicted_traget_sum
    )
    
    class_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=predicted_flag_logits,
            labels=data.next_train_batch_op[-1]['target_flag'],
        )
    )
    
    loss = regress_loss + class_loss
    
    grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, var_list),
            40.0
        )
    
    
    grads_and_vars = list(zip(grads, var_list))
    
    optimizer = tf.train.AdamOptimizer(train_learn_rate, epsilon=1e-5)
    
    train_op = optimizer.apply_gradients(grads_and_vars)
    
    accuracy = tf.metrics.accuracy(
        labels=data.next_train_batch_op[-1]['target_sum'][..., None],
        predictions=predicted_traget_sum,

    )
    
    combined_op = [train_op, loss, accuracy]
    
    

[2018-05-26 19:33:26.400850] INFO: DataCorpus: train data shape: (893, 512, 25)
[2018-05-26 19:33:26.563175] INFO: DataCorpus: test data shape: (891, 512, 25)
[2018-05-26 19:33:26.638669] INFO: DataCorpus: cv_fraction: 0.2, train_size: 714, cv size: 179


In [6]:
data.next_train_batch_op[-1]['target_flag'].shape

TensorShape([Dimension(None), Dimension(2)])

In [9]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    for ep in range(1):
        print('Fit attempt: ', ep)
        data.reset()

        #Iterate through sets:
        print('Train:')
        i = 0
        while True:
            try:
                fetched = sess.run(
                    [data.next_train_batch_op[-1]['target_sum'], predicted_traget_sum, train_op, loss]
                )
                if i % 200 == 0:
                    print('\n{}: mean_loss: {}'.format(i, fetched[-1]))
                    
                    for t, p in zip(list(fetched[0]), list(fetched[1][:, 0])):
                        print('{}<--{}'.format(t,p))
                    
            except tf.errors.OutOfRangeError:
                print('Train EOD')
                break
            
            i += 1
                
print('Done')

Fit attempt:  0
Train:

0: mean_loss: 103.47559356689453
11.921854019165039<--1.2831323146820068
0.0<--0.8150356411933899
13.60656452178955<--0.2888767123222351
12.612622261047363<--1.4555948972702026
12.719679832458496<---0.3298313617706299
1.2556160688400269<--2.4053874015808105
10.540872573852539<--2.4802467823028564
0.0<---0.3305782079696655
10.430431365966797<--1.388880968093872
0.0<---0.5334077477455139
8.265907287597656<---0.38409680128097534
10.239399909973145<---0.5860074758529663
12.536955833435059<--1.78446364402771
12.3842191696167<--1.4334683418273926
11.034383773803711<--0.8690291047096252
12.686492919921875<--1.6714341640472412
11.116933822631836<---0.6106808185577393
0.0<---0.8604185581207275
12.014841079711914<---0.9769929647445679
10.902576446533203<--1.3135185241699219
13.611640930175781<--0.6061166524887085
0.0<--0.1139059066772461
12.082633018493652<---0.463015615940094
0.0<--0.6592423319816589
10.813395500183105<---0.7555115222930908
10.918180465698242<--0.9758582


800: mean_loss: 0.2741929292678833
12.646967887878418<--13.12875747680664
11.377877235412598<--11.573738098144531
11.01646900177002<--10.776954650878906
10.461586952209473<--10.950248718261719
12.643160820007324<--13.204402923583984
10.198444366455078<--9.616561889648438
0.0<--0.7027947902679443
11.815631866455078<--11.399055480957031
10.122087478637695<--9.896461486816406
8.265907287597656<--8.758218765258789
0.0<--0.03014354035258293
0.0<---0.9777609705924988
10.824005126953125<--11.15264892578125
12.206846237182617<--12.070042610168457
0.0<---0.07486093044281006
0.0<--0.25906839966773987
12.538615226745605<--12.646903991699219
12.160640716552734<--12.198808670043945
0.0<--0.05962614342570305
0.0<---0.9740332365036011
8.74409008026123<--8.939849853515625
12.45239543914795<--12.090806007385254
13.020060539245605<--13.83243179321289
11.937068939208984<--12.12384033203125
12.679953575134277<--12.917388916015625
3.4339871406555176<--5.836841583251953
11.156265258789062<--11.162344932556

In [None]:
100 %100

In [None]:


tf.reset_default_graph()

data_pl = tf.placeholder(tf.float32)
labels_pl = tf.placeholder(tf.float32)

auc = tf.metrics.auc(
    labels=labels_pl,
    predictions=data_pl,
    weights=None,
    num_thresholds=200,
    curve='ROC',
    name='AUC',
    summation_method='trapezoidal'
)

In [None]:
batch_size = 32

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    seq = []
    for i in range(100):
        data = np.random.uniform(0, 1, size=batch_size)[..., None]
        #data = np.absolute(np.random.normal(1, 1, size=batch_size)[..., None])
        #data /= data.max()
        #data = np.random.beta(5,0.5, size=batch_size)[..., None]
        
        labels = np.random.binomial(1, 0.9, batch_size)[..., None]
        
        data = data + 0.2 * labels
        data /= data.max()
        
        out = sess.run(auc, {data_pl: data, labels_pl: labels})
        seq.append(out[-1])

In [None]:
plt.figure(0)

_ = plt.hist(np.asarray(seq), bins=50)

In [None]:
plt.figure(1)

_ = plt.hist(data, bins=50)
_ = plt.hist(labels, bins=50)

In [10]:
np.exp(np.log(10))

10.000000000000002