In [2]:
# Import necessary package.
import tensorflow as tf
import numpy as np  
import matplotlib.pyplot as plt
import pandas as pd

### Data Preparation.

In [4]:
# Read data from csv file.
anime_data = pd.read_csv('data/anime.csv')
rating_data = pd.read_csv('data/rating.csv')

In [40]:
# Convert data into UxM-Matrix, with unknown ratings as zeros.
rating_data = rating_data.drop_duplicates(subset=['user_id', 'anime_id'], keep='first') # Delete the duplicate with error.
rating_matrix = rating_data.reset_index().pivot(index='user_id', columns='anime_id', values='rating')

In [46]:
# Fill Na with -1.
rating_matrix = rating_matrix.fillna(-1)

In [75]:
# The data shape.
rating_matrix.shape

(73515, 11200)

In [86]:
# Examples of rating matrix.
rating_matrix[0:3]

anime_id,1,5,6,7,8,15,16,17,18,19,...,34283,34324,34325,34349,34358,34367,34412,34475,34476,34519
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [87]:
# Make sure that there is no row with all -1.
sum(rating_matrix.apply(lambda x: x.sum(), axis=1)==-11200)

3915

In [80]:
# Split data into train and validation data.
validation_matrix = rating_matrix[0:10000]
train_matrix = rating_matrix[10000:]

### Network Structure.

In [3]:
# Define global variables.
FLAGS = tf.app.flags.FLAGS

# Set global variables.
tf.app.flags.DEFINE_integer('num_visible', 11200,
                            'Number of visible neurons (Number of movies the users rated.)')
tf.app.flags.DEFINE_integer('num_hidden_1', 1280,
                            'Number of hidden layer_1 neurons.)')
tf.app.flags.DEFINE_integer('num_hidden_2', 128,
                            'Number of hidden layer_2 neurons.)')

In [4]:
# Define the Deep AutoEncoder.
# Define weight and bias.
with tf.name_scope('weights'):
    weight_1 = tf.get_variable(name='weight_1', shape=(FLAGS.num_visible, FLAGS.num_hidden_1), 
                               initializer=tf.random_normal_initializer(mean=0.0, stddev=0.05))
    weight_2 = tf.get_variable(name='weight_2', shape=(FLAGS.num_hidden_1, FLAGS.num_hidden_2), 
                               initializer=tf.random_normal_initializer(mean=0.0, stddev=0.05))
    weight_3 = tf.get_variable(name='weight_3', shape=(FLAGS.num_hidden_2, FLAGS.num_hidden_1), 
                               initializer=tf.random_normal_initializer(mean=0.0, stddev=0.05))
    weight_4 = tf.get_variable(name='weight_4', shape=(FLAGS.num_hidden_1, FLAGS.num_visible), 
                               initializer=tf.random_normal_initializer(mean=0.0, stddev=0.05))
with tf.name_scope('biases'):
    bias_1 = tf.get_variable(name='bias_1', shape=(FLAGS.num_hidden_1), 
                             initializer=tf.zeros_initializer())
    bias_2 = tf.get_variable(name='bias_2', shape=(FLAGS.num_hidden_2), 
                             initializer=tf.zeros_initializer())
    bias_3 = tf.get_variable(name='bias_3', shape=(FLAGS.num_hidden_1), 
                             initializer=tf.zeros_initializer())

In [5]:
# Define inference of Neural Network.
ratings = tf.placeholder(tf.float32, [None, FLAGS.num_visible],
                         name='input_ratings')
with tf.name_scope('inference'):
    infer_1 = tf.nn.sigmoid(tf.nn.bias_add(tf.matmul(ratings, weight_1), bias_1))
    infer_2 = tf.nn.sigmoid(tf.nn.bias_add(tf.matmul(infer_1, weight_2), bias_2))
    infer_3 = tf.nn.sigmoid(tf.nn.bias_add(tf.matmul(infer_2, weight_3), bias_3))
    output = tf.matmul(infer_3, weight_4)

In [8]:
# Count the number of training labels.
mask = tf.where(tf.equal(ratings, -1.0), ratings, tf.zeros_like(ratings)) 
num_train_labels = tf.cast(tf.count_nonzero(mask), dtype=tf.float32) 
# Mask output with -1.
output = tf.where(tf.equal(ratings, -1.0), output, tf.zeros_like(output) - 1.0) 

In [9]:
# Compute the loss of neural network.
with tf.name_scope('loss'):
    loss = tf.div(tf.reduce_sum(tf.square(tf.subtract(output, ratings))), num_train_labels)

In [103]:
with tf.Session() as sess:
    print sess.run(b)

[1 1]


In [102]:
a = [1,2]
b = tf.zeros_like(a)+1