## 1. Reading data and creating a recomender Matrix

#### 1.1 Read the CSV

In [7]:
import pandas as pd
import os

os.environ['KMP_DUPLICATE_LIB_OK']='True'
df = pd.read_csv('ratings_Apps_for_Android_processed.csv')
# df.columns = ['user','item','rating']

num_items = df.item.nunique()
num_users = df.user.nunique()    
print("Unique users : {} ,and Unique apps: {}".format(num_users, num_items))   
df.head()

Unique users : 368104 ,and Unique apps: 11045


Unnamed: 0,user,item,rating
0,AMNRMCC3IHXSD,B004AGCR1K,1.0
1,A249RKULWQDFA0,B004AGCR1K,1.0
2,A21BZ7ERKIVFFB,B004AGCR1K,1.0
3,A5L95BPFYPCPN,B004ALJIOE,5.0
4,A2KM7J2DNEG61C,B004ALJIOE,5.0


#### 1.2 Normalising the reviews from 0-5 to 0-1

In [8]:
from sklearn import preprocessing
r = df['rating'].values.astype(float)
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(r.reshape(-1,1))
df_normalized = pd.DataFrame(x_scaled)
df['rating'] = df_normalized

#### 1.3 Converting the dataframe to a matrix.
with users as rows and the different apps as columns and values as ratings.
Fill all non available ratings as zeros.

In [9]:
import numpy as np
from tqdm import tqdm

small_db = True
if small_db:
    _u = list(df.user.unique())
    df = df.loc[df['user'].isin(_u[0:10000])]
    matrix = df.pivot(index='user', columns='item', values='rating')
    matrix.fillna(0, inplace=True)
    users = matrix.index.tolist()
    items = matrix.columns.tolist()
    matrix = matrix.values
else:
    print('Takes more than 20 Mins!!!')
    items = list(df.item.unique())
    users = list(df.user.unique())
    matrix = np.zeros([num_users,num_items])
    for i in tqdm(range(0,df.shape[0])):
        _t = df.loc[i]
        matrix[users.index(_t['user']),items.index(_t['item'])]  = _t['rating']
num_items = df.item.nunique()
num_users = df.user.nunique() 

## 2. Building a tensorflow model

#### 2.1 Buidling a tensorflow model architecture

In [10]:
import numpy as np
import tensorflow as tf
import os

num_input = num_items
num_hidden_1 = 30
num_hidden_2 = 10

EXPORT_DIR = './model'

if os.path.exists(EXPORT_DIR):
    shutil.rmtree(EXPORT_DIR)
    
X = tf.placeholder(tf.float32, [None, num_input],name='input_tensor')
weights = {
    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1], dtype=tf.float32), name='encoder_h1'),
    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2], dtype=tf.float32),name='encoder_h2'),
    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1], dtype=tf.float32),name='decoder_h1'),
    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input], dtype=tf.float32),name='decoder_h2'),
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float32),name='encoder_b1'),
    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2], dtype=tf.float32),name='encoder_b2'),
    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1], dtype=tf.float32),name='decoder_b1'),
    'decoder_b2': tf.Variable(tf.random_normal([num_input], dtype=tf.float32),name='decoder_b2'),
}

# Building the encoder
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']),name='encoder_layer1')
    # Encoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2']),name='encoder_layer2')
    return layer_2

# Building the decoder
def decoder(x):
    # Decoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']), biases['decoder_b1']),name='decoder_layer1')
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']), biases['decoder_b2']),name='decoder_layer2')
    return layer_2


# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)


# Prediction
y_pred = decoder_op


# Targets are the input data.
y_true = X

# Define loss and optimizer, minimize the squared error

loss = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.RMSPropOptimizer(0.03).minimize(loss)

# Define evaluation metrics

eval_x = tf.placeholder(tf.int32, )
eval_y = tf.placeholder(tf.int32, )
pre, pre_op = tf.metrics.precision(labels=eval_x, predictions=eval_y)



#### 2.2 Train the model and save the model

In [11]:
init = tf.global_variables_initializer()
# local_init = tf.local_variables_initializer()
epochs = 25
batch_size = 256

with tf.Session() as sess:
    sess.run(init)

    num_batches = int(matrix.shape[0] / batch_size)
    matrix = np.array_split(matrix, num_batches)

    for i in range(epochs):

        avg_cost = 0

        for batch in matrix:
            _, l = sess.run([optimizer, loss], feed_dict={X: batch})
            avg_cost += l

        avg_cost /= num_batches

        print("Epoch: {} Loss: {}".format(i + 1, avg_cost))
        
    WC1 = weights['encoder_h1'].eval(sess)
    BC1 = biases['encoder_b1'].eval(sess)
    WC2 = weights['encoder_h2'].eval(sess)
    BC2 = biases['encoder_b2'].eval(sess)
    WD1 = weights['decoder_h1'].eval(sess)
    BD1 = biases['decoder_b1'].eval(sess)
    WD2 = weights['decoder_h2'].eval(sess)
    BD2 = biases['decoder_b2'].eval(sess)
    saver = tf.train.Saver()
    save_path = saver.save(sess, "./Seekers_Model")
matrix = np.concatenate(matrix, axis=0)

Epoch: 1 Loss: 0.3990599581828484
Epoch: 2 Loss: 0.3960302479756184
Epoch: 3 Loss: 0.37613681187996495
Epoch: 4 Loss: 0.2938448270926109
Epoch: 5 Loss: 0.1187096135929609
Epoch: 6 Loss: 0.03362015357766396
Epoch: 7 Loss: 0.01737565131714711
Epoch: 8 Loss: 0.014089030715135427
Epoch: 9 Loss: 0.010965578090877105
Epoch: 10 Loss: 0.009747261921755778
Epoch: 11 Loss: 0.008894290321339399
Epoch: 12 Loss: 0.008364836804759808
Epoch: 13 Loss: 0.00834521030386289
Epoch: 14 Loss: 0.007562704933568453
Epoch: 15 Loss: 0.0068592444324913696
Epoch: 16 Loss: 0.006075209233527765
Epoch: 17 Loss: 0.005717557986290791
Epoch: 18 Loss: 0.0053570067080167625
Epoch: 19 Loss: 0.005111882152656714
Epoch: 20 Loss: 0.005106533065629311
Epoch: 21 Loss: 0.005101367413329008
Epoch: 22 Loss: 0.0050962816279094955
Epoch: 23 Loss: 0.005089629548960007
Epoch: 24 Loss: 0.00483060669965851
Epoch: 25 Loss: 0.004618493255036764


#### 2.3 Predicting 

In [12]:
print('Prediction')
def make_singlePred(sample):
    with tf.Session() as sess:
        sess.run(init)
        preds = sess.run(decoder_op, feed_dict={X: samples})

        return preds
samples = np.zeros([1,num_input])


print(make_singlePred(samples))
    
samples = np.zeros([1,num_input])
samples[0,num_input-1]=0.6
# the user gave num_input-1 app a rating of 0.6*5=3 star
print(make_singlePred(samples))


Prediction
[[0.6973344  0.03454772 0.9281022  ... 0.05060022 0.98643965 0.02912048]]
[[0.06257072 0.5338691  0.23451154 ... 0.0204625  0.05798892 0.00115483]]


#### 2.4 Freeze model for android development

In [13]:
def freeze_graph(model_dir, output_node_names):
    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path
    absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + "/frozen_model.pb"
    clear_devices = True
    with tf.Session(graph=tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,tf.get_default_graph().as_graph_def(),output_node_names.split(",")) 
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
    return output_graph_def

_ = freeze_graph('.', 'decoder_layer2')

INFO:tensorflow:Restoring parameters from ./Seekers_Model
INFO:tensorflow:Froze 8 variables.
INFO:tensorflow:Converted 8 variables to const ops.


#### 2.5 Convert the model from tensorflow to tflite using toco

In [14]:
print('Below Please replace --input_shape=1,#### with --input_shape=1,{}'.format(num_input))

Below Please replace --input_shape=1,#### with --input_shape=1,11045


In [15]:
!toco --graph_def_file=frozen_model.pb --output_file=optimized_graph.lite   --input_format=TENSORFLOW_GRAPHDEF   --output_format=TFLITE   --input_shape=1,11045 --input_array="input_tensor" --output_array="decoder_layer2" --input_data_type=FLOAT --inference_type=FLOAT

2018-11-18 04:47:22.084158: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
