In [1]:
# Train xgboost by epoch. 
# Start with default weights, train XGBoost, produce accuracy
# Run epoch again, but default weight will change, train XGBoost, product accuracy
# continue

In [2]:
import keras
from keras.utils.np_utils import to_categorical

import tensorflow as tf

import xgboost as xgb

import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv('./DATA/sensorless_data.csv')
data.head()

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,...,F40,F41,F42,F43,F44,F45,F46,F47,F48,TARGET
0,-3.0146e-07,8.2603e-06,-1.2e-05,-2e-06,-1.4386e-06,-2.1e-05,0.031718,0.03171,0.031721,-0.032963,...,-0.63308,2.9646,8.1198,-1.4961,-1.4961,-1.4961,-1.4996,-1.4996,-1.4996,1
1,2.9132e-06,-5.2477e-06,3e-06,-6e-06,2.7789e-06,-4e-06,0.030804,0.03081,0.030806,-0.03352,...,-0.59314,7.6252,6.169,-1.4967,-1.4967,-1.4967,-1.5005,-1.5005,-1.5005,1
2,-2.9517e-06,-3.184e-06,-1.6e-05,-1e-06,-1.5753e-06,1.7e-05,0.032877,0.03288,0.032896,-0.029834,...,-0.63252,2.7784,5.3017,-1.4983,-1.4983,-1.4982,-1.4985,-1.4985,-1.4985,1
3,-1.3226e-06,8.8201e-06,-1.6e-05,-5e-06,-7.2829e-07,4e-06,0.02941,0.029401,0.029417,-0.030156,...,-0.62289,6.5534,6.2606,-1.4963,-1.4963,-1.4963,-1.4975,-1.4975,-1.4976,1
4,-6.8366e-08,5.6663e-07,-2.6e-05,-6e-06,-7.9406e-07,1.3e-05,0.030119,0.030119,0.030145,-0.031393,...,-0.6301,4.5155,9.5231,-1.4958,-1.4958,-1.4958,-1.4959,-1.4959,-1.4959,1


### Format input data

In [4]:
x_cols = list(data.columns[:-1])
X_data = data[x_cols].copy()
# Adding 0 for easy reshaping
X_data['F49'] = 0
X_data.head()

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,...,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49
0,-3.0146e-07,8.2603e-06,-1.2e-05,-2e-06,-1.4386e-06,-2.1e-05,0.031718,0.03171,0.031721,-0.032963,...,-0.63308,2.9646,8.1198,-1.4961,-1.4961,-1.4961,-1.4996,-1.4996,-1.4996,0
1,2.9132e-06,-5.2477e-06,3e-06,-6e-06,2.7789e-06,-4e-06,0.030804,0.03081,0.030806,-0.03352,...,-0.59314,7.6252,6.169,-1.4967,-1.4967,-1.4967,-1.5005,-1.5005,-1.5005,0
2,-2.9517e-06,-3.184e-06,-1.6e-05,-1e-06,-1.5753e-06,1.7e-05,0.032877,0.03288,0.032896,-0.029834,...,-0.63252,2.7784,5.3017,-1.4983,-1.4983,-1.4982,-1.4985,-1.4985,-1.4985,0
3,-1.3226e-06,8.8201e-06,-1.6e-05,-5e-06,-7.2829e-07,4e-06,0.02941,0.029401,0.029417,-0.030156,...,-0.62289,6.5534,6.2606,-1.4963,-1.4963,-1.4963,-1.4975,-1.4975,-1.4976,0
4,-6.8366e-08,5.6663e-07,-2.6e-05,-6e-06,-7.9406e-07,1.3e-05,0.030119,0.030119,0.030145,-0.031393,...,-0.6301,4.5155,9.5231,-1.4958,-1.4958,-1.4958,-1.4959,-1.4959,-1.4959,0


In [5]:
len(X_data)

58509

In [6]:
X = X_data.to_numpy().reshape((len(X_data),7,7,1))
X.shape

(58509, 7, 7, 1)

In [7]:
X[0].shape

(7, 7, 1)

In [8]:
data['TARGET'] = data['TARGET'] - 1
y = data['TARGET'].to_numpy()

In [9]:
y = to_categorical(y, num_classes=data['TARGET'].nunique())

### Randomizing dataset

In [10]:
idxs = np.arange(len(X))
samples = np.random.choice(idxs,size=10000)

X_rand = X[samples]
y_rand = y[samples]

### CNN Model

In [11]:
# Only contains layers up to flatten
# Xgboost will take us to final 11 logits (probailities)
model = keras.models.Sequential([
    keras.layers.Conv2D(256, (2,2), strides=1, activation='relu', input_shape=(7,7,1)),
    keras.layers.Conv2D(256, (2,2), strides=1, activation='relu'),
    keras.layers.Flatten()
])

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 6, 6, 256)         1280      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 5, 5, 256)         262400    
_________________________________________________________________
flatten (Flatten)            (None, 6400)              0         
Total params: 263,680
Trainable params: 263,680
Non-trainable params: 0
_________________________________________________________________


### Combined model attempt

In [13]:
optimizer = keras.optimizers.Adam()
loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)

In [14]:
params = {'objective':'multi:softprob', 'num_class':11}

In [21]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_rand[:10000], y_rand[:10000]))
train_dataset = train_dataset.batch(1)

In [44]:
xs = X_rand[:10000]
ys = y_rand[:10000]
features = model(xs, training=True)
labels = [list(r).index(0) for r in ys]

dtrain = xgb.DMatrix(features.numpy(), label=labels)
bst = xgb.train(params, dtrain)
logits = bst.predict(dtrain)

tf_xs = tf.convert_to_tensor(xs)
tf_ys = tf.convert_to_tensor(ys)
tf_logits = tf.convert_to_tensor(logits)
with tf.GradientTape() as tape:
    tape.watch(model.trainable_weights)
    # Run the forward pass of the layer.
    # The operations that the layer applies
    # to its inputs are going to be recorded
    # on the GradientTape
#     features = model(xs, training=True)  # Logits for this minibatch

    # Compute the loss value for this minibatch.
    loss_value = loss_fn(tf_ys, tf_logits)

# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, model.trainable_weights)

# Run one step of gradient descent by updating
# the value of the variables to minimize the loss.
optimizer.apply_gradients(zip(grads, model.trainable_weights))



ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0'].

In [47]:
tape.gradient?

[1;31mSignature:[0m
[0mtape[0m[1;33m.[0m[0mgradient[0m[1;33m([0m[1;33m
[0m    [0mtarget[0m[1;33m,[0m[1;33m
[0m    [0msources[0m[1;33m,[0m[1;33m
[0m    [0moutput_gradients[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0munconnected_gradients[0m[1;33m=[0m[1;33m<[0m[0mUnconnectedGradients[0m[1;33m.[0m[0mNONE[0m[1;33m:[0m [1;34m'none'[0m[1;33m>[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Computes the gradient using operations recorded in context of this tape.

Args:
  target: a list or nested structure of Tensors or Variables to be
    differentiated.
  sources: a list or nested structure of Tensors or Variables. `target`
    will be differentiated against elements in `sources`.
  output_gradients: a list of gradients, one for each element of
    target. Defaults to None.
  unconnected_gradients: a value which can either hold 'none' or 'zero' and
    alters the value which will be returned if

In [111]:
# for (x_batch_train, y_batch_train) in train_dataset:
#     with tf.GradientTape() as tape:
#         # Run the forward pass of the layer.
#         # The operations that the layer applies
#         # to its inputs are going to be recorded
#         # on the GradientTape
#         features = model(x_batch_train, training=True)  # Logits for this minibatch
#         labels = [list(r).index(0) for r in y_batch_train]

#         dtrain = xgb.DMatrix(features.numpy(), label=labels)
#         bst = xgb.train(params, dtrain)
#         logits = tf.convert_to_tensor(bst.predict(dtrain))

#         # Compute the loss value for this minibatch.
#         loss_value = loss_fn(y_batch_train, logits)

#     # Use the gradient tape to automatically retrieve
#     # the gradients of the trainable variables with respect to the loss.
#     grads = tape.gradient(loss_value, model.trainable_weights)

#     # Run one step of gradient descent by updating
#     # the value of the variables to minimize the loss.
#     optimizer.apply_gradients(zip(grads, model.trainable_weights))

In [92]:
tape.watched_variables()

(<tf.Variable 'conv2d_2/kernel:0' shape=(2, 2, 1, 256) dtype=float32, numpy=
 array([[[[-0.07284722,  0.04625756, -0.03468293, ...,  0.04178733,
           -0.05144799,  0.04714624]],
 
         [[-0.04565594, -0.00010972, -0.0097011 , ...,  0.05431872,
            0.01638418,  0.0102551 ]]],
 
 
        [[[-0.04973967,  0.06152815,  0.05775816, ..., -0.06755853,
            0.04159236,  0.01956882]],
 
         [[ 0.04163671,  0.05185356,  0.03649052, ...,  0.03251676,
            0.0119964 , -0.00093066]]]], dtype=float32)>,
 <tf.Variable 'conv2d_2/bias:0' shape=(256,) dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 

In [72]:
logits

array([[0.94750625, 0.00535458, 0.00523769, ..., 0.00523769, 0.00523769,
        0.00523769],
       [0.9476097 , 0.00524588, 0.00523827, ..., 0.00523827, 0.00523827,
        0.00523827],
       [0.9476097 , 0.00524588, 0.00523827, ..., 0.00523827, 0.00523827,
        0.00523827],
       ...,
       [0.9476097 , 0.00524588, 0.00523827, ..., 0.00523827, 0.00523827,
        0.00523827],
       [0.94750625, 0.00535458, 0.00523769, ..., 0.00523769, 0.00523769,
        0.00523769],
       [0.9476097 , 0.00524588, 0.00523827, ..., 0.00523827, 0.00523827,
        0.00523827]], dtype=float32)

In [68]:
loss_value

<tf.Tensor: shape=(), dtype=float32, numpy=2.5291567>

In [58]:
y_rand[:10000].shape

(10000, 11)

In [None]:
epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):

        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation.
        with tf.GradientTape() as tape:

            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
            logits = model(x_batch_train, training=True)  # Logits for this minibatch

            # Compute the loss value for this minibatch.
            loss_value = loss_fn(y_batch_train, logits)

        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)

        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %s samples" % ((step + 1) * 64))
