In [2]:
import sys
import pandas as pd
import numpy  as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
#######################
def batchgen(batchsize):

    def getbatch(x,y):
        assert (len(x) == len(y)), "dimension mismatch"
        for i in range(0, len(y), batchsize):
            yield x[i:i+batchsize], y[i:i+batchsize],
    return getbatch

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
class vardict(dict):
    def __getattr__(self, name):
        return self[name]

    def __setattr__(self,name, val):
        self[name] = val

    def __getstate__(self):
        return self.__dict__.items()

    def __setstate__(self, items):
        for key, val in items:
            self.__dict__[key] = val

In [5]:
def summary_dict(summary_str, summary_proto = None):
    "convert summary string to a dictionary"
    if summary_proto is None:
        summary_proto = tf.Summary()
    summary_proto.ParseFromString(summary_str)
    summaries = {}
    for val in summary_proto.value:
        # Assuming all summaries are scalars.
        summaries[val.tag] = val.simple_value
    return summaries

In [11]:
from tflearn import tflearn

class tflasso(tflearn):
    def _create_network(self):
        self.vars = vardict()
        self.vars.x = tf.placeholder("float", shape=[None, self.xlen])
        self.vars.y = tf.placeholder("float", shape=[None, 1])

        #def fully_connected():
            
        # Create Model
        self.parameters["W1"] = tf.Variable(tf.truncated_normal([1, self.xlen], stddev=0.1), name="weight")
        self.parameters["b1"] = tf.Variable(tf.constant(0.1, shape=[1, 1]), name="bias")
        
        
        self.vars.y_predicted = tf.matmul( self.vars.x, tf.transpose(self.W1)) + self.b1
        self.saver = tf.train.Saver()
        
    def _create_loss(self):
        # Minimize the squared errors
        l2_loss = tf.reduce_mean(tf.pow( self.vars.y_predicted - self.vars.y, 2))
        l2_sy = tf.scalar_summary( "L2_loss", l2_loss )
        "Lasso penalty"
        l1_penalty = tf.reduce_sum((tf.abs(tf.concat(1, [self.W1,self.b1], name = "l1" ) )) )
        l1p_sy =  tf.scalar_summary( "L1_penalty" , l1_penalty )
        "total"
        tot_loss = l2_loss + self.ALPHA * l1_penalty
        tot_loss_sy =  tf.scalar_summary( "loss" , tot_loss )
        "R2"
        _, y_var = tf.nn.moments(self.vars.y, [0,1])
        rsq =  1 - l2_loss / y_var
        rsq_sy = tf.scalar_summary( "R2", rsq)
        return tot_loss

In [7]:
datafile = "../../data/atac_tss_800_1.h5"

with pd.HDFStore(datafile) as store:
    print(store.groups())
    y_ = store["y"]
    X_ = store["X"]

[/X (Group) ''
  children := ['axis1_label0' (Array), 'axis1_level1' (Array), 'block0_values' (Array), 'axis0_level1' (Array), 'axis1_label1' (Array), 'block0_items_level0' (VLArray), 'axis0_level0' (VLArray), 'block0_items_level1' (Array), 'axis1_level0' (Array), 'axis0_label0' (Array), 'block0_items_label1' (Array), 'block0_items_label0' (Array), 'axis0_label1' (Array)], /y (Group) ''
  children := ['index_level1' (Array), 'values' (Array), 'index_level0' (Array), 'index_label0' (Array), 'index_label1' (Array)]]


In [8]:
""" transform data """
sys.path.append("..")
from transform_tss import safelog, sumstrands, groupcolumns

feature_step = 100
select = list(feature_step * np.arange(-2,3,1))

Xgr = groupcolumns(X_, step = feature_step, select = select)

X, y = safelog(Xgr, y_)

from sklearn.preprocessing import PolynomialFeatures
pf3 = PolynomialFeatures(degree=3)
X3 = pf3.fit_transform(X)
trainsamples = 4000
train_X, train_Y = X3[:trainsamples], y[:trainsamples].as_matrix()

In [9]:
def get_labels(pf):
    return list(pf._combinations(10, degree=pf.degree,
                          interaction_only=pf.interaction_only,
                          include_bias = pf.include_bias))

In [10]:
tfl = tflasso(ALPHA = 2e-1, checkpoint_dir = "./cubiclasso/", dropout = None)
tfl.fit( train_X, train_Y , load = True)

loading a session
training epochs: 10600 ... 15600, saving each 100' epoch
Epoch: 10600	
	train	R2: 0.3950	loss: 13.6292	L2_loss: 10.5626	L1_penalty: 15.3332
Epoch: 10700	
	train	R2: 0.3951	loss: 13.6015	L2_loss: 10.5608	L1_penalty: 15.2033
 66%|██████▌   | 66/100 [00:05<00:02, 12.66it/s]

17.4581682381


KeyboardInterrupt: 

In [38]:
load = 0
g = tf.Graph()
with g.as_default():
    tfl._create_network()

    tot_loss = tfl._create_loss()
    summary_op = tf.merge_all_summaries()
    sess_config = tf.ConfigProto(inter_op_parallelism_threads=tfl.NUM_CORES,
                               intra_op_parallelism_threads= tfl.NUM_CORES)
    # Initializing the variables
    init = tf.initialize_all_variables()

    with tf.Session(config = sess_config) as sess:
        if load:
            tfl._load_(sess)
        else:
            sess.run(init)

        feed_dict={ tfl.vars.x: train_X, }
        if tfl.dropout:
            feed_dict[ tfl.vars.keep_prob] = 0.5

        y_predicted = sess.run( tfl.vars.y_predicted,
                        feed_dict = feed_dict )
        if y is not None:
            feed_dict[ tfl.vars.y ] = np.reshape(y, [-1, 1])
            tfl.summary_proto = tf.Summary()
            print(tfl.summary_proto.value)
            #summary_str = sess.run(summary_op, feed_dict=feed_dict)
            #summary_d = summary_dict(summary_str, tfl.summary_proto)


[]


In [34]:
tfl.summary_proto.DESCRIPTOR.fields

{}

In [19]:
ts, r2s = list(zip( *tfl.r2_progress ))
plt.plot(ts, r2s)
plt.xlabel("epoch")
plt.ylabel("R^2")
plt.ylim([0, 0.1* np.ceil(10*max(r2s))])
pass

r2_progress not found


TypeError: type object argument after * must be a sequence, not NoneType

In [13]:
test_X, test_Y = X3[trainsamples:], y[trainsamples:].as_matrix()

In [18]:
tfl = tflasso(checkpoint_dir = "./cubiclasso/", dropout = False)
tfl.transform( test_X, test_Y, load = True)

print( tfl.loss )
r2 = 1- tfl.loss/test_Y.var()
r2

loading a session


TypeError: Fetch argument None of None has invalid type <class 'NoneType'>, must be a string or Tensor. (Can not convert a NoneType into a Tensor or Operation.)

In [None]:
pf3.powers_()

In [None]:
W1 = tfl.get_params()["W1"][0]
ncoef = len(W1)
xlabels = np.array( get_labels(pf3) )

forder = np.array([len(x) for x in xlabels])


fig, axs = plt.subplots(3,figsize = (14, 5))
fig.subplots_adjust(hspace=.5)
for nn in range(3):
    valid =( forder == (nn+1))
    print(sum(valid))
    x_ =  np.arange(ncoef)[valid]
    y_ = np.log10( abs(W1[valid]) )
    axs[nn].scatter(x_, y_ )
    axs[nn].scatter( x_[y_>-3], y_[y_>-3], 25, "r" )
    #axs[nn].stem( x_[y_>-3], y_[y_>-3], markerfmt = "ro" )
    if nn < 2:
        axs[nn].set_xticks(x_ )
        axs[nn].set_xticklabels([repr(x) for x in xlabels[valid]], rotation = 90)
    else:
        axs[nn].set_xticks(x_[::4] )
        axs[nn].set_xticklabels([repr(x) for x in xlabels[valid][::4]], rotation = 90)
pass

In [None]:
plt.stem?

In [None]:
W1 = tfl.get_params()["W1"][0]
print(len(W1))
plt.stem( np.arange(len(W1)), np.log10( abs(W1)) )
plt.stem( np.arange(len(W1))[np.log10(W1)>-3], np.log10(W1)[np.log10(W1)>-3], markerfmt = "ro" )