In [1]:
import sys
import pandas as pd
import numpy  as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
#######################
def batchgen(batchsize):

    def getbatch(x,y):
        assert (len(x) == len(y)), "dimension mismatch"
        for i in range(0, len(y), batchsize):
            yield x[i:i+batchsize], y[i:i+batchsize],
    return getbatch

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
class vardict(dict):
    def __getattr__(self, name):
        return self[name]

    def __setattr__(self,name, val):
        self[name] = val

    def __getstate__(self):
        return self.__dict__.items()

    def __setstate__(self, items):
        for key, val in items:
            self.__dict__[key] = val

In [4]:
def summary_dict(summary_str, summary_proto = None):
    "convert summary string to a dictionary"
    if summary_proto is None:
        summary_proto = tf.Summary()
    summary_proto.ParseFromString(summary_str)
    summaries = {}
    for val in summary_proto.value:
        # Assuming all summaries are scalars.
        summaries[val.tag] = val.simple_value
    return summaries

In [17]:
from tflearn import tflearn

class tflasso(tflearn):
    def _create_network(self):
        self.vars = vardict()
        self.vars.xx = tf.placeholder("float", shape=[None, self.xlen])
        self.vars.yy = tf.placeholder("float", shape=[None, 1])

        #def fully_connected():
            
        # Create Model
        self.parameters["W1"] = tf.Variable(tf.truncated_normal([1, self.xlen], stddev=0.1), name="weight")
        self.parameters["b1"] = tf.Variable(tf.constant(0.1, shape=[1, 1]), name="bias")
        
        
        self.vars.y_predicted = tf.matmul( self.vars.xx, tf.transpose(self.W1)) + self.b1
        self.saver = tf.train.Saver()
        

In [6]:
datafile = "../../data/atac_tss_800_1.h5"

with pd.HDFStore(datafile) as store:
    print(store.groups())
    y_ = store["y"]
    X_ = store["X"]

[/X (Group) ''
  children := ['axis1_label1' (Array), 'axis1_label0' (Array), 'block0_items_label1' (Array), 'axis0_level1' (Array), 'block0_items_level1' (Array), 'axis0_label1' (Array), 'block0_items_label0' (Array), 'axis0_level0' (VLArray), 'block0_items_level0' (VLArray), 'axis1_level1' (Array), 'axis0_label0' (Array), 'block0_values' (Array), 'axis1_level0' (Array)], /y (Group) ''
  children := ['index_level1' (Array), 'index_level0' (Array), 'values' (Array), 'index_label1' (Array), 'index_label0' (Array)]]


In [7]:
""" transform data """
sys.path.append("..")
from transform_tss import safelog, sumstrands, groupcolumns

feature_step = 100
select = list(feature_step * np.arange(-2,3,1))

Xgr = groupcolumns(X_, step = feature_step, select = select)

X, y = safelog(Xgr, y_)

from sklearn.preprocessing import PolynomialFeatures
pf3 = PolynomialFeatures(degree=3)
X3 = pf3.fit_transform(X)
trainsamples = 4000
train_X, train_Y = X3[:trainsamples], y[:trainsamples].as_matrix()

In [8]:
def get_labels(pf):
    return list(pf._combinations(10, degree=pf.degree,
                          interaction_only=pf.interaction_only,
                          include_bias = pf.include_bias))

In [11]:
tfl = tflasso(ALPHA = 2e-1, checkpoint_dir = "./cubiclasso/", dropout = None)
tfl.fit( train_X, train_Y , load = True)

loading a session
training epochs: 2800 ... 7800, saving each 100' epoch
Epoch: 2801	tot loss= 11.4907	L2 loss= 11.4907	R^2= 0.3418
Epoch: 2901	tot loss= 11.4585	L2 loss= 11.4585	R^2= 0.3437
Epoch: 3001	tot loss= 11.4280	L2 loss= 11.4280	R^2= 0.3454
Epoch: 3101	tot loss= 11.3988	L2 loss= 11.3988	R^2= 0.3471
Epoch: 3201	tot loss= 11.3711	L2 loss= 11.3711	R^2= 0.3487
Epoch: 3301	tot loss= 11.3445	L2 loss= 11.3445	R^2= 0.3502
Epoch: 3401	tot loss= 11.3192	L2 loss= 11.3192	R^2= 0.3516
Epoch: 3501	tot loss= 11.2949	L2 loss= 11.2949	R^2= 0.3530
Epoch: 3601	tot loss= 11.2716	L2 loss= 11.2716	R^2= 0.3544
Epoch: 3701	tot loss= 11.2493	L2 loss= 11.2493	R^2= 0.3556
Epoch: 3801	tot loss= 11.2278	L2 loss= 11.2278	R^2= 0.3569
Epoch: 3901	tot loss= 11.2072	L2 loss= 11.2072	R^2= 0.3581
Epoch: 4001	tot loss= 11.1874	L2 loss= 11.1874	R^2= 0.3592
Epoch: 4101	tot loss= 11.1683	L2 loss= 11.1683	R^2= 0.3603
Epoch: 4201	tot loss= 11.1499	L2 loss= 11.1499	R^2= 0.3613
Epoch: 4301	tot loss= 11.1321	L2 loss= 11.

17.4581682381


KeyboardInterrupt: 

In [19]:
ts, r2s = list(zip( *tfl.r2_progress ))
plt.plot(ts, r2s)
plt.xlabel("epoch")
plt.ylabel("R^2")
plt.ylim([0, 0.1* np.ceil(10*max(r2s))])
pass

r2_progress not found


TypeError: type object argument after * must be a sequence, not NoneType

In [13]:
test_X, test_Y = X3[trainsamples:], y[trainsamples:].as_matrix()

In [18]:
tfl = tflasso(checkpoint_dir = "./cubiclasso/", dropout = False)
tfl.transform( test_X, test_Y, load = True)

print( tfl.loss )
r2 = 1- tfl.loss/test_Y.var()
r2

loading a session


TypeError: Fetch argument None of None has invalid type <class 'NoneType'>, must be a string or Tensor. (Can not convert a NoneType into a Tensor or Operation.)

In [None]:
pf3.powers_()

In [None]:
W1 = tfl.get_params()["W1"][0]
ncoef = len(W1)
xlabels = np.array( get_labels(pf3) )

forder = np.array([len(x) for x in xlabels])


fig, axs = plt.subplots(3,figsize = (14, 5))
fig.subplots_adjust(hspace=.5)
for nn in range(3):
    valid =( forder == (nn+1))
    print(sum(valid))
    x_ =  np.arange(ncoef)[valid]
    y_ = np.log10( abs(W1[valid]) )
    axs[nn].scatter(x_, y_ )
    axs[nn].scatter( x_[y_>-3], y_[y_>-3], 25, "r" )
    #axs[nn].stem( x_[y_>-3], y_[y_>-3], markerfmt = "ro" )
    if nn < 2:
        axs[nn].set_xticks(x_ )
        axs[nn].set_xticklabels([repr(x) for x in xlabels[valid]], rotation = 90)
    else:
        axs[nn].set_xticks(x_[::4] )
        axs[nn].set_xticklabels([repr(x) for x in xlabels[valid][::4]], rotation = 90)
pass

In [None]:
plt.stem?

In [None]:
W1 = tfl.get_params()["W1"][0]
print(len(W1))
plt.stem( np.arange(len(W1)), np.log10( abs(W1)) )
plt.stem( np.arange(len(W1))[np.log10(W1)>-3], np.log10(W1)[np.log10(W1)>-3], markerfmt = "ro" )