In [1]:
import pandas as pd
import numpy as np

## Get Some Data

In [2]:
data = pd.read_csv('dataset/df_view_pay_with_month_week_aqi_weather.csv', index_col=[0, 1])
data

Unnamed: 0_level_0,Unnamed: 1_level_0,view_cnt,pay_cnt,weekday,month,aqi,weather,wind,tmp
shop_id,time_stamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2015-10-10,0,188,5,10,64,0.0,0.0,17.0
1,2015-10-11,0,272,6,10,68,6.0,0.0,16.5
1,2015-10-12,0,257,0,10,85,1.0,0.0,17.5
1,2015-10-13,0,239,1,10,84,5.0,0.0,17.5
1,2015-10-14,0,277,2,10,143,0.0,0.0,18.0
1,2015-10-15,0,227,3,10,164,6.0,0.0,19.0
1,2015-10-16,0,242,4,10,135,1.0,0.0,20.5
1,2015-10-17,0,293,5,10,78,5.0,0.0,20.0
1,2015-10-18,0,291,6,10,66,5.0,0.0,20.0
1,2015-10-19,0,256,0,10,67,0.0,0.0,19.5


In [3]:
def data_generator(config, df):
    batch_size = config.batch_size
    num_steps = config.num_steps
    ID_list = list(df.index.levels[0])
    m = len(ID_list)
    assert batch_size <= m, 'batch_size({}) > sample number({}), lower batch_size!'.format(batch_size, m)
    for batch_id in range(int(m/batch_size)): 
        np.random.shuffle(ID_list)
        batch_ID_list = ID_list[0:batch_size]
        batch_ID_list = ID_list[batch_id*batch_size:(batch_id+1)*batch_size]
        _features, _labels = [], []
        for ID in batch_ID_list:
            each = df.loc[ID]
            each = each.reset_index(drop=True)
            _m = each.shape[0] - 1
            assert num_steps < _m, 'lower num_steps!'
            _s_idx = np.random.randint(0, _m-num_steps, 1)[0]
            _e_idx = _s_idx + num_steps
            f1 = each.loc[_s_idx:_e_idx-1].values
            _features.append(f1)
            _labels.append(each.loc[_s_idx+1:_e_idx, 'pay_cnt'].values.reshape(-1, 1))
        
        features = np.stack(_features)
        labels = np.stack(_labels)
        yield features, labels, batch_ID_list   
    

In [4]:
class TrainConfig(object):
    batch_size = 10
    num_steps = 30
    feature_size = 8
    lstm_size = 70
train_config = TrainConfig()

class TestConfig(TrainConfig):
    num_steps = 1
test_config = TestConfig()

In [5]:
f, l, id_list = data_generator(train_config, data).next()
print(f.shape)
print(l.shape)
print(id_list)

(10, 30, 8)
(10, 30, 1)
[891, 43, 483, 674, 1110, 1210, 1872, 1109, 13, 344]


## Play Around With TensoFLow

In [6]:
import tensorflow as tf

In [7]:
# tf.InteractiveSession()

In [8]:
config = train_config

In [26]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, shape=[None, train_config.num_steps, train_config.feature_size])

# cell = tf.contrib.rnn.BasicLSTMCell(config.lstm_size)
cell = tf.contrib.rnn.LSTMCell(config.lstm_size)
initial_state = cell.zero_state(config.batch_size, tf.float32)


## dynamic rnn
# outputs, state = tf.nn.dynamic_rnn(cell, x, initial_state=initial_state, time_major=False)
# outputs = outputs
# final_state = state

## static rnn
# Permuting batch_size and n_steps
_x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
_x = tf.reshape(_x, [-1, train_config.feature_size])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
_x = tf.split(_x, train_config.num_steps, 0)
outputs, state = tf.contrib.rnn.static_rnn(cell, _x, dtype=tf.float32)
outputs = tf.transpose(outputs, [1, 0, 2])
final_state = state



> NOTE: **dynamic rnn** prefered

In [28]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    inputs, _, _ = data_generator(train_config, data).next()
    print('inputs.shape: [batch_size, num_steps, feature_size] = {}'.format(inputs.shape))
    output, state = sess.run([outputs, final_state], feed_dict={x: inputs})
    print('output.shape: [batch_size, num_steps, lstm_size] = {}'.format(output.shape))
    print('''final_state tuple(c_state, m_state),
          c or m_state.shape = [batch_size, lstm_size] = {}'''.format(state[1].shape))
#     last_out = output[:, -1, :]
#     print last_out.shape
#     print (last_out == state[1])

inputs.shape: [batch_size, num_steps, feature_size] = (10, 30, 8)
output.shape: [batch_size, num_steps, lstm_size] = (10, 30, 70)
final_state tuple(c_state, m_state),
          c or m_state.shape = [batch_size, lstm_size] = (10, 70)


In [None]:
# tf.contrib.rnn.static_rnn()