In [1]:
import pandas as pd

In [2]:
import tensorflow as tf
import tensorflow.contrib.slim as slim

  from ._conv import register_converters as _register_converters


In [3]:
import numpy as np

### Step1: Read Data

In [4]:
data = pd.read_csv('data/data.csv')

In [5]:
data = data[['Customer_ID','Age','APE', 'DeltaValue', 'Premium','Product']]

In [6]:
data = data.sort_values(by=['Customer_ID','Age'])

In [7]:
data.head()

Unnamed: 0,Customer_ID,Age,APE,DeltaValue,Premium,Product
49,0,9,37,-0.93557,53,1
25,0,16,20,1.895494,17,8
27,0,19,24,-0.130597,17,5
32,0,31,37,-0.702042,55,0
9,0,49,26,0.147721,26,1


### Step2: one hot coding:

In [8]:
products = pd.get_dummies(data=data['Product'])

In [9]:
products.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
49,0,1,0,0,0,0,0,0,0,0
25,0,0,0,0,0,0,0,0,1,0
27,0,0,0,0,0,1,0,0,0,0
32,1,0,0,0,0,0,0,0,0,0
9,0,1,0,0,0,0,0,0,0,0


### Step4 :normalization

In [10]:
features = ['Age','APE', 'DeltaValue', 'Premium']

In [11]:
for feature in features:
    max_value = max(data[feature])
    min_value = min(data[feature])
    data[feature] = data[feature].apply(lambda x:(x-min_value)/(max_value-min_value))

### Step 4:  Split the data into X and Y in Numpy Format

In [12]:
data_X = data[['Customer_ID','Age','APE', 'DeltaValue', 'Premium']]

In [13]:
data_X_1 = data_X.groupby(['Customer_ID']).apply(lambda X:X[['Age','APE', 'DeltaValue', 'Premium']].values,).reset_index()

In [14]:
data_X_1.rename(columns={0:'Sequence'},inplace =True)

In [15]:
data_X_1.head()

Unnamed: 0,Customer_ID,Sequence
0,0,"[[0.1038961038961039, 0.8947368421052632, 0.31..."
1,1,"[[0.07792207792207792, 0.15789473684210525, 0...."
2,2,"[[0.22077922077922077, 1.0, 0.5251309215519462..."
3,3,"[[0.0, 0.9473684210526315, 0.5630322725339292,..."
4,4,"[[0.025974025974025976, 0.6842105263157895, 0...."


In [16]:
X_Tensor = data_X_1['Sequence'].values

In [17]:
data_Y = data[['Customer_ID']]

In [18]:
data_Y = pd.concat([data_Y,products],axis =1)

In [19]:
data_Y.head()

Unnamed: 0,Customer_ID,0,1,2,3,4,5,6,7,8,9
49,0,0,1,0,0,0,0,0,0,0,0
25,0,0,0,0,0,0,0,0,0,1,0
27,0,0,0,0,0,0,1,0,0,0,0
32,0,1,0,0,0,0,0,0,0,0,0
9,0,0,1,0,0,0,0,0,0,0,0


In [20]:
list(data_Y)

['Customer_ID', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [21]:
data_Y_1 = data_Y.groupby(['Customer_ID']).apply(lambda X:X[[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]].values,).reset_index()

In [22]:
data_Y_1.rename(columns={0:'Sequence'},inplace =True)

In [23]:
data_Y_1.head()

Unnamed: 0,Customer_ID,Sequence
0,0,"[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, ..."
1,1,"[[0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, ..."
2,2,"[[0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, ..."
3,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, ..."
4,4,"[[0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, ..."


In [24]:
Y_Tensor = data_Y_1['Sequence'].values

### Step 5: Model Construction

In [27]:
X_Tensor[0].shape

(9, 4)

In [28]:
Y_Tensor[0].shape

(9, 10)

In [29]:
input_ph = tf.placeholder(shape=[None,1,4], dtype=tf.float32, name='input')
target_ph = tf.placeholder(shape=[None,10], dtype=tf.float32, name='target')

In [30]:
### function to create a LSTM Cell
def build_lstm(num_units, num_layers, batch_size, keep_prob=1):
    def build_cell(num_units, keep_prob):
        cell = tf.nn.rnn_cell.LSTMCell(num_units,reuse=tf.AUTO_REUSE)
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)
        
        return cell
    cell = tf.nn.rnn_cell.MultiRNNCell([build_cell(num_units, keep_prob) for _ in range(num_layers)])
    
    ##  创建一个初始状态
    init_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, init_state

In [31]:
### dynamic to run the lstm cell recusively, 一次运行， 就直接 得到最后的结果，不用反复的 对模型feed inputdata
def lstm(x, num_units, num_layers, batch_size, init_state=None, keep_prob=1, time_major=True, scope='lstm', reuse=tf.AUTO_REUSE):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        cell, zero_state = build_lstm(num_units, num_layers, batch_size, keep_prob)
        if init_state is not None:
            out, final_state = tf.nn.dynamic_rnn(cell, x, initial_state=init_state, time_major=time_major)
        else:
            out, final_state = tf.nn.dynamic_rnn(cell, x, initial_state=zero_state, time_major=time_major)
    return out, final_state

In [32]:
def rnn_classify(inputs, rnn_units=100, rnn_layers=2, batch_size=1, keep_prob=1,num_class = 10):

    rnn_out, rnn_state = lstm(inputs, rnn_units, rnn_layers, batch_size, keep_prob=keep_prob,)
    
    #rnn_out (Sequence,batch_size=1,num_unites)
    
    net = slim.fully_connected(rnn_out, num_outputs=num_class, activation_fn=None, scope='classification')

    net = slim.flatten(net)
    

    return net
    

In [33]:
out = rnn_classify(input_ph, batch_size=1, keep_prob=1,num_class=10)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.


In [34]:
loss = tf.losses.softmax_cross_entropy(logits=out, onehot_labels=target_ph)

Instructions for updating:
Use tf.cast instead.


In [35]:
#### defind optimizer
lr = 0.001
optimizer = tf.train.AdamOptimizer(lr)
train_op = optimizer.minimize(loss)

In [36]:
### create session to run the flow
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [37]:
for i in range(1000):
    loss_list = []
    for (Y,X) in zip(Y_Tensor,X_Tensor):
        sess.run(train_op,feed_dict={target_ph:Y.reshape(-1,10),input_ph:X.reshape(-1,1,4)})
        loss_train = sess.run(loss,feed_dict={input_ph:X.reshape(-1,1,4), target_ph:Y.reshape(-1,10)})
#         print(loss_train)
        loss_list.append(loss_train)
    if i%100==99:
        print('STEP {}: train_loss: {:.4f}'.format(i+1,np.mean(loss_list)))

STEP 100: train_loss: 0.7941
STEP 200: train_loss: 0.3651
STEP 300: train_loss: 0.2372
STEP 400: train_loss: 0.1431
STEP 500: train_loss: 0.0858
STEP 600: train_loss: 0.0447
STEP 700: train_loss: 0.0200
STEP 800: train_loss: 0.0088
STEP 900: train_loss: 0.0041
STEP 1000: train_loss: 0.0022


In [38]:
### predict the latest time purchase
predict_result = []
real_result = []
for (Y,X) in zip(Y_Tensor,X_Tensor):
    real_result.append(np.argmax(Y[-1])) ### 
    pred = sess.run(out,feed_dict={input_ph:X.reshape(-1,1,4)})
    predict_result.append(np.argmax(pred[-1])) ### add predict result

In [39]:
predict_result ### predition result: last time which products cusotmer purchase 

[3, 9, 1, 5, 1, 6]

In [40]:
real_result #real result: last time which products cusotmer purchase 

[3, 9, 1, 5, 1, 6]

In [41]:
data.groupby('Customer_ID').tail(1) ### last time which product cusotmer purchase 

Unnamed: 0,Customer_ID,Age,APE,DeltaValue,Premium,Product
16,0,1.0,0.421053,0.355996,1.0,3
13,1,0.935065,0.052632,0.438736,0.357143,9
42,2,1.0,0.684211,0.269634,0.010204,1
45,3,0.714286,0.210526,0.417837,0.530612,5
34,4,0.805195,0.947368,0.0,0.836735,1
12,5,0.571429,0.842105,0.534093,0.959184,6
