In [4533]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tf_agents
import os
import random
from collections import defaultdict

from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment

from tf_agents.trajectories import time_step as ts
from tf_agents.specs import array_spec
from tf_agents.specs import tensor_spec


import tensorflow_probability as tfp
from tf_agents.utils import nest_utils

In [4]:
ROOT_DIR = os.getcwd()
DATA_DIR = os.path.join(ROOT_DIR, "data")

In [5]:
#Loading datasets
# read dat file
ratings_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'ratings.dat'), 'r').readlines()]
users_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'users.dat'), 'r').readlines()]
movies_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'movies.dat'),encoding='latin-1').readlines()]

# Craete DataFrame
ratings_df = pd.DataFrame(ratings_list, columns = ['UserID', 'MovieID', 'Rating', 'Timestamp'], dtype = np.uint32)
movies_df = pd.DataFrame(movies_list, columns = ['MovieID', 'Title', 'Genres'])
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)
users_df = pd.DataFrame(users_list, columns=['UserID','Gender','Age','Occupation','Zip-code'])

In [3701]:
EMBEDDING_DIM = 100
STATE_SIZE = 10

### Embedding

In [3918]:
users_num = ratings_df["UserID"].max() + 1
items_num = ratings_df["MovieID"].max() + 1

embedding_network = UserMovieEmbedding(users_num, items_num, EMBEDDING_DIM)
embedding_network([np.zeros((1,)),np.zeros((1,))])
embedding_network.load_weights('save_weights/user_movie_embedding_case4.h5')

items_ids = np.array(range(items_num))
movie_embedding = embedding_network.get_layer('movie_embedding')(items_ids)

### Environment

In [3919]:
class UserMovieEmbedding(tf.keras.Model):
    def __init__(self, len_users, len_movies, embedding_dim):
        super(UserMovieEmbedding, self).__init__()
        self.m_u_input = tf.keras.layers.InputLayer(name='input_layer', input_shape=(2,))
        # embedding
        self.u_embedding = tf.keras.layers.Embedding(name='user_embedding', input_dim=len_users, output_dim=embedding_dim)
        self.m_embedding = tf.keras.layers.Embedding(name='movie_embedding', input_dim=len_movies, output_dim=embedding_dim)
        # dot product
        self.m_u_merge = tf.keras.layers.Dot(name='movie_user_dot', normalize=False, axes=1)
        # output
        self.m_u_fc = tf.keras.layers.Dense(1, activation='sigmoid')
        
    def call(self, x):
        x = self.m_u_input(x)
        uemb = self.u_embedding(x[0])
        memb = self.m_embedding(x[1])
        m_u = self.m_u_merge([memb, uemb])
        return self.m_u_fc(m_u)

In [3920]:
class DRRAveStateRepresentation(tf.keras.Model):
    def __init__(self, embedding_dim):
        super(DRRAveStateRepresentation, self).__init__()
        self.embedding_dim = embedding_dim
        self.wav = tf.keras.layers.Conv1D(1, 1, 1)
        self.concat = tf.keras.layers.Concatenate()
        self.flatten = tf.keras.layers.Flatten()
        
    def call(self, x):
        items_eb = tf.transpose(x[1], perm=(0,2,1))/self.embedding_dim
        wav = self.wav(items_eb)
        wav = tf.transpose(wav, perm=(0,2,1))
        wav = tf.squeeze(wav, axis=1)
        user_wav = tf.keras.layers.multiply([x[0], wav])
        concat = self.concat([x[0], user_wav, wav])
        return self.flatten(concat)

In [4663]:
class RS_Env(py_environment.PyEnvironment):
    def __init__(self, ratings_df, embedding_dim, state_size, embedding_network):
        self.users_num = ratings_df["UserID"].max() + 1
        self.items_num = ratings_df["MovieID"].max() + 1
        self.ratings_df = ratings_df
        self.pos_ratings_df = ratings_df.loc[ratings_df["Rating"] >= 4]
        self.embedding_dim = embedding_dim
        self.embedding_network = embedding_network
        self.state_size = state_size
        
            
        self._action_spec = array_spec.ArraySpec(shape = (1, ), dtype = np.int64, name = "action")
        self._observation_spec = array_spec.ArraySpec(shape = (1,3*self.embedding_dim, ), dtype = np.float32, name = "state_representation")
        
        
        self.valid_users = self._generate_valid_user()
        
        # reset env
        self.user_id = np.random.choice(self.valid_users, size = 1).item()
        
        self.user_df = self.ratings_df.loc[self.ratings_df["UserID"] == self.user_id]
        self.movie_rate_dict = defaultdict(lambda: -0.5, zip(self.user_df["MovieID"], self.user_df["Rating"]))
        self.state_items_ids = self.user_df.loc[self.user_df["Rating"] >= 4, "MovieID"].head(self.state_size).values
        self.user_items = self.user_df["MovieID"].values
        
        self.recommended_items = self.state_items_ids.copy()
        

        user_eb = self.embedding_network.get_layer('user_embedding')(np.array(self.user_id))
        state_items_eb = self.embedding_network.get_layer('movie_embedding')(np.array(self.state_items_ids))
        
        
        self.srm_ave = DRRAveStateRepresentation(self.embedding_dim)
        self._state = np.array(self.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(state_items_eb, axis=0)]))
        
        for x in self.recommended_items:
            self.movie_rate_dict[x] = -0.5
            
        self._episode_ended = False
        
        
    def action_spec(self):
        return self._action_spec
    
    def observation_spec(self):
        return self._observation_spec
    
    def _reset(self):        
        
        self.user_id = np.random.choice(self.valid_users, size = 1).item()
        
        self.user_df = self.ratings_df.loc[self.ratings_df["UserID"] == self.user_id]
        self.movie_rate_dict = defaultdict(lambda: -0.5, zip(self.user_df["MovieID"], self.user_df["Rating"]))
        self.state_items_ids = self.user_df.loc[self.user_df["Rating"] >= 4, "MovieID"].head(self.state_size).values
        self.user_items = self.user_df["MovieID"].values
        
        self.recommended_items = self.state_items_ids.copy()
        

        user_eb = self.embedding_network.get_layer('user_embedding')(np.array(self.user_id))
        state_items_eb = self.embedding_network.get_layer('movie_embedding')(np.array(self.state_items_ids))
        
        
        self.srm_ave = DRRAveStateRepresentation(self.embedding_dim)
        self._state = np.array(self.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(state_items_eb, axis=0)]))
        
        for x in self.recommended_items:
            self.movie_rate_dict[x] = -0.5
            
        self._episode_ended = False
        
        return ts.restart(self._state)
        
        
    def _generate_valid_user(self):
        temp = self.ratings_df.loc[ratings_df["Rating"] >= 4].groupby(["UserID"])["Rating"].count()
        valid_users = temp.loc[temp >= self.state_size].index
        
        return valid_users
    
    def _step(self, action):
        
        if self._episode_ended:
            return self.reset()    
        
        
        if action in self.user_items:
            if action not in self.recommended_items:
                rate = self.user_df.loc[self.user_df["MovieID"] == action, "Rating"].values
                reward = (rate-3)/2
                if reward > 0:
                    self.state_items_ids = np.append(self.state_items_ids[1:], values = action)
                    state_items_eb = self.embedding_network.get_layer('movie_embedding')(np.array(self.state_items_ids))
                    self._state = self.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(state_items_eb, axis=0)])
            else:
                reward = 0
        else:
            reward = -0.5
        
        self.recommended_items = np.unique(np.append(self.recommended_items, action))
        
        
        if len(self.recommended_items) > 20:
            self._episode_ended = True
            
        if self._episode_ended:
            return ts.termination(self._state, reward)
        else:
            return ts.transition(self._state, reward, discount = 0.9)

In [4666]:
rs_env = RS_Env(ratings_df, embedding_dim = 100, state_size = 10, embedding_network = embedding_network)

In [4667]:
tf_rs_env = tf_py_environment.TFPyEnvironment(rs_env)

### Actor

In [4615]:
from tf_agents.networks import network

In [4616]:
# actor_net  = tf_agents.agents.ddpg.actor_network.ActorNetwork(
#     input_tensor_spec = tf_rs_env.observation_spec(), 
#     output_tensor_spec = tf_rs_env.action_spec(),  # --> Only float actions are supported by this network.
#     fc_layer_params=[128, 128, 100],
#     activation_fn = tf.nn.relu,
#     name = "ActorNetwork"
# )

In [4617]:
items_ids = np.array(range(items_num))
movie_embedding = embedding_network.get_layer('movie_embedding')(items_ids)

In [4682]:
class ActorNetwork(network.Network):
    def __init__(self, 
                 observation_spec,
                 action_spec,
                 embedding_dim,
                 hidden_dim,
                 items_num,
                 movie_embedding,
                 name):
        
        super(ActorNetwork, self).__init__(input_tensor_spec=observation_spec, state_spec=(), name=name)
        
        self.movie_embedding = movie_embedding
        self.items_num = items_num
        self._action_spec = action_spec
#         self._network_output_spec = None
        self.observation_spec = observation_spec
        
#         self.model = tf.keras.Sequential([
#             tf.keras.layers.InputLayer(name='input_layer', input_shape=(3*embedding_dim,)),
#             tf.keras.layers.Dense(hidden_dim, activation='relu'),
#             tf.keras.layers.Dense(hidden_dim, activation='relu'),
#             tf.keras.layers.Dense(embedding_dim, activation='tanh')
#         ])

        self.model = tf_agents.networks.Sequential([tf.keras.layers.InputLayer(name='input_layer', input_shape=(3*embedding_dim,)),
                                                    tf.keras.layers.Dense(hidden_dim, activation='relu'),
                                                    tf.keras.layers.Dense(hidden_dim, activation='relu'),
                                                    tf.keras.layers.Dense(embedding_dim, activation='tanh')
                                                   ])
                                                   
        
        
        self.recommended_items = []
    
    def create_variables(self, input_tensor_spec=None, **kwargs):
        """Force creation of the network's variables.
        Return output specs.
        Args:
          input_tensor_spec: (Optional).  Override or provide an input tensor spec
            when creating variables.
          **kwargs: Other arguments to `network.call()`, e.g. `training=True`.
        Returns:
          Output specs - a nested spec calculated from the outputs (excluding any
          batch dimensions).  If any of the output elements is a tfp `Distribution`,
          the associated spec entry returned is `None`.
        Raises:
          ValueError: If no `input_tensor_spec` is provided, and the network did
            not provide one during construction.
        """
        if self._network_output_spec is not None:
            if self._network_output_spec.shape != []:
                print(f"_network_output_spec : {self._network_output_spec}")
                return self._network_output_spec
    
        if self._input_tensor_spec is None:
            print(f"_input_tensor_spec : {self._input_tensor_spec}")
            self._input_tensor_spec = input_tensor_spec
        input_tensor_spec = self._input_tensor_spec
        
        if input_tensor_spec is None:
            raise ValueError(
              "Unable to create_variables: no input_tensor_spec provided, and "
              "Network did not define one.")

        random_input = tensor_spec.sample_spec_nest(
            input_tensor_spec, outer_dims=(1,))
        
        initial_state = self.get_initial_state(batch_size=1)
        step_type = tf.fill((1,), time_step.StepType.FIRST)
        outputs = self.__call__(
                    random_input,
                    step_type=step_type,
                    network_state=initial_state,
                    **kwargs)
        print(outputs)
        
#         def _calc_unbatched_spec(x):
#             if isinstance(x, tfp.distributions.Distribution):
#                 parameters = distribution_utils.get_parameters(x)
#                 parameter_specs = _convert_to_spec_and_remove_singleton_batch_dim(parameters, outer_ndim=1)

#                 return distribution_utils.DistributionSpecV2(event_shape=x.event_shape, dtype=x.dtype, parameters=parameter_specs)
#             else:
#                 return nest_utils.remove_singleton_batch_spec_dim(tf.type_spec_from_value(x), outer_ndim=1)

#         self._network_output_spec = tf.nest.map_structure(_calc_unbatched_spec, outputs[0])

        self._network_output_spec = tf.type_spec_from_value(outputs[0])
        return self._network_output_spec        
    
    
    def call(self, observations, step_type = (), network_state = ()):
        action_score, network_state = self.model(observations)
        action_score = tf.reshape(action_score, (1,100))
        
        items_ids = np.array(range(self.items_num))
        
#         items_ebs = self.embedding_network.get_layer('movie_embedding')(items_ids)
        items_ebs = self.movie_embedding
        action_score = tf.transpose(action_score, perm=(1,0))
        
        item_idx = np.argmax(tf.keras.backend.dot(items_ebs, action_score))
        
        action = int(items_ids[item_idx])
        self.recommended_items.append(action)
        
        return tf.nest.pack_sequence_as(self._action_spec, [tf.convert_to_tensor(np.array([action]))]), network_state 
#         return tf.convert_to_tensor(np.array([action])), network_state


In [4683]:
actor_net = ActorNetwork(tf_rs_env.observation_spec(), tf_rs_env.action_spec(), embedding_dim = EMBEDDING_DIM, hidden_dim = 128, items_num = items_num, movie_embedding = movie_embedding, name = "ActorNetwork2323")

In [4684]:
target_actor_net = ActorNetwork(tf_rs_env.observation_spec(), tf_rs_env.action_spec(), embedding_dim = EMBEDDING_DIM, hidden_dim = 128, items_num = items_num, movie_embedding = movie_embedding, name = "TargetActorNetwork2323")

### Critic

In [4685]:
critic_net = tf_agents.agents.ddpg.critic_network.CriticNetwork(
    input_tensor_spec = (tf_rs_env.observation_spec(), tf_rs_env.action_spec()),
    observation_fc_layer_params = [100],
    joint_fc_layer_params = [128, 128],
    activation_fn = tf.nn.relu,
    output_activation_fn = tf.nn.relu,
    name='CriticNetwork'
)

In [4686]:
target_critic_net = tf_agents.agents.ddpg.critic_network.CriticNetwork(
    input_tensor_spec = (tf_rs_env.observation_spec(), tf_rs_env.action_spec()),
    observation_fc_layer_params = [100],
    joint_fc_layer_params = [128, 128],
    activation_fn = tf.nn.relu,
    output_activation_fn = tf.nn.relu,
    name='TargetCriticNetwork'
)

In [4687]:
ACTOR_LEARNIG_RATE = 0.001
CRITIC_LEARNIG_RATE = 0.001

In [4689]:
tf_ddpg_agent = tf_agents.agents.DdpgAgent(time_step_spec = tf_rs_env.time_step_spec(),
                                           action_spec = tf_rs_env.action_spec(),
                                           actor_network = actor_net,
                                           critic_network = critic_net,
                                           actor_optimizer = tf.keras.optimizers.Adam(learning_rate=ACTOR_LEARNIG_RATE),
                                           critic_optimizer = tf.keras.optimizers.Adam(learning_rate=CRITIC_LEARNIG_RATE),
#                                            target_actor_network = target_actor_net,
#                                            target_critic_network = target_critic_net,
                                           target_update_tau = 0.001,
                                           target_update_period = 1,
                                           gamma = 0.9
                                           )

_network_output_spec : TensorSpec(shape=(1,), dtype=tf.int64, name=None)
(<tf.Tensor: shape=(1,), dtype=int64, numpy=array([2657])>, ())
_network_output_spec : TensorSpec(shape=(1,), dtype=tf.int64, name=None)
_network_output_spec : TensorSpec(shape=(1,), dtype=tf.int64, name=None)


ValueError: OU Noise is applicable only to continuous actions.
  In call to configurable 'DdpgAgent' (<class 'tf_agents.agents.ddpg.ddpg_agent.DdpgAgent'>)

In [None]:
driver(tf_ddpg_agent)