In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import pandas as pd
import random
from gym import spaces

import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.policies import ActorCriticPolicy


In [2]:
movie_features_path = 'data/movie_features.csv'
rating_features_path = 'data/ratings_features.csv'

In [3]:
pd.read_csv(movie_features_path).drop('Unnamed: 0',axis=1).head(5)

Unnamed: 0,movieId,title,genres,tag,encoded_movie_ids,desc,desc_embedding_glove
0,4,Waiting to Exhale (1995),comedy drama romance,"['chick flick', 'revenge', 'characters', 'chic...",0,"Waiting to Exhale (1995) | comedy, drama, roma...",[-3.90065998e-01 2.85064846e-01 1.03168570e-...
1,11,"American President, The (1995)",comedy drama romance,"['president', 'president', 'us president', 'wh...",1,"American President, The (1995) | comedy, drama...",[ 5.35968468e-02 2.33869895e-01 2.60694951e-...
2,16,Casino (1995),crime drama,"['de niro in pink', 'joe pesci', 'martin scors...",2,"Casino (1995) | crime, drama | de niro in pink...",[-0.09848612 -0.4099722 -0.08234926 -0.131738...
3,30,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,crime drama,"['gong li', 'yimou zhang', 'zhang yimou']",3,Shanghai Triad (Yao a yao yao dao waipo qiao) ...,[ 0.09402635 0.07983646 0.02374954 -0.276408...
4,36,Dead Man Walking (1995),crime drama,"['nun', 'death row', 'capital punishment', 'co...",4,"Dead Man Walking (1995) | crime, drama | nun, ...",[ 2.41189212e-01 -4.62112427e-02 1.89653516e-...


In [4]:
pd.read_csv(rating_features_path).drop('Unnamed: 0',axis=1).head(50)

Unnamed: 0,userId,movieId,rating,timestamp,encoded_user_ids
0,7,3062,3.0,2002-01-16 18:10:54,0
1,7,4308,4.0,2002-01-16 18:17:06,0
2,7,4339,4.0,2002-01-16 18:21:26,0
3,7,16,3.0,2002-01-16 18:22:58,0
4,7,2028,5.0,2002-01-16 18:24:41,0
5,7,4349,3.0,2002-01-16 18:27:31,0
6,7,2384,3.0,2002-01-16 18:29:03,0
7,7,1932,3.0,2002-01-16 18:33:18,0
8,7,2427,3.0,2002-01-16 18:36:28,0
9,7,750,5.0,2002-01-16 18:44:19,0


In [15]:
class User:
    def __init__(self, userId, encodedUserId, movieRating):
        self.userId = userId
        self.encodedUserId = encodedUserId
        self.movieRating = movieRating

    def __str__(self):
        user_info = f"Encoded UserId: {self.encodedUserId}\nUserId: {self.userId}\nMovies Rating: {self.movieRating}\n"
        return user_info
    
    def get_user_rating(self, movie_id):
        df = self.movieRating[self.movieRating['movieId']==movie_id]
        if df and len(df)>0:
            rating = df['rating'].tolist()[0]
        else:
            return 0.0
        

class Movie:
    def __init__(self, movieId, encodedMovieId, title, genre, tag, desc, descEmbedding):
        self.movieId = movieId
        self.encodedMovieId = encodedMovieId
        self.title = title
        self.genre = genre
        self.tag = tag
        self.desc = desc
        self.descEmbedding = descEmbedding
    
    def __str__(self):
        movie_info = f"\nEncoded MovieId: {self.encodedMovieId}\nMovieId: {self.movieId}\nTitle: {self.title}\nGenre: {self.genre}\nTag: {self.tag}"
        return movie_info
    


class Recommender():

    def __init__(self, movie_features_path,ratings_features_path):
        super(Recommender, self).__init__()
        self.movies_data,self.users_data,self.ratings_data = self.load_features(movie_features_path,
                                                                                ratings_features_path)
        
        self.movies_map,self.users_map = self.get_list_of_objects(self.movies_data,
                                                                  self.users_data)
        
        self.sequences = self.get_sequences(self.movies_data,self.ratings_data)  

        self.state_sequences = self.get_user_sequence_combinations()
        

    def load_features(self,movie_features_path,ratings_features_path):
        movies = pd.read_csv(movie_features_path).drop(['Unnamed: 0'], axis=1)
        ratings = pd.read_csv(ratings_features_path).drop(['Unnamed: 0','timestamp'], axis=1)
        
        users = ratings.groupby(['userId','encoded_user_ids']).apply(lambda x: list(zip(x['movieId'], x['rating']))).reset_index()
        users.rename(columns = {0:'movie_rating'}, inplace = True)

        return movies,users,ratings
    
    def get_list_of_objects(self,movies_data, user_data):
        users_map = {};movies_map = {}
        

        for row in user_data.itertuples():
            user_id = row.userId
            encoded_user_id = row.encoded_user_ids
            movie_rating = row.movie_rating
            user = User(user_id, encoded_user_id, movie_rating)
            users_map[user_id]=user

        for row in movies_data.itertuples():
            movie_id = row.movieId
            encoded_movie_id = row.encoded_movie_ids
            title = row.title
            genre = row.genres
            tags = row.tag
            desc = row.desc
            desc_embedding = row.desc_embedding_glove
            movie = Movie(movie_id, encoded_movie_id, title, genre, tags, desc, desc_embedding)
            movies_map[movie_id]=movie
        
        return movies_map, users_map
    

    def get_sequences(self,moviesdf, ratingsdf):

        # Assuming you have a list of all movie IDs
        all_movies = moviesdf.movieId.unique().tolist()
        
        # Assuming you have encoded_user_ids in ratingsdf
        all_users = ratingsdf.userId.unique().tolist()

        user_objects = []
    
        for user_id in all_users:
            user_ratings = ratingsdf[ratingsdf['userId'] == user_id]
            # Extracting user details
            user_details = {
                'user_id': user_id,                
                'ratings_df': user_ratings
            }
            user_movies_watched = user_ratings['movieId'].tolist()
            user_movie_objects = []
            for movie_id in user_movies_watched:
                movie_details = moviesdf[moviesdf['movieId'] == movie_id]
                # Extracting movie details
                movie_object = {
                    'movie_id': movie_id,
                    'genre': movie_details['genres'].iloc[0],
                    'tag': movie_details['tag'].iloc[0],
                    'title': movie_details['title'].iloc[0],
                    'desc': movie_details['desc'].iloc[0],
                    'desc_embedding': movie_details['desc_embedding_glove'].iloc[0]
                }
                user_movie_objects.append(movie_object)
            user_objects.append({
                'user_details': user_details,
                'movie_objects': user_movie_objects
            })
        
        # Creating sequences
        sequence_dict = {}
        
        for user_obj in user_objects:
            user_id = user_obj['user_details']['user_id']
            user_ratings = user_obj['user_details']['ratings_df']
            movie_seq = user_ratings['movieId'].tolist()
            # Take n as initially watched movies
            n = 5
            # Take m as number of recommended movies
            m = 5
            # Neg pos ratio
            k = 2
            sequence_lst = []
            while n + m <= len(movie_seq):
                movie_watched_bucket = movie_seq[:n]
                movie_to_be_watched_bucket = movie_seq[n:n+m]
                common_pos_samples = list(set(movie_watched_bucket).union(set(movie_to_be_watched_bucket)))
                total_neg_samples = [mv for mv in all_movies if mv not in movie_seq]
                num_neg_samples = min(k * (n+m), len(total_neg_samples))
                selected_neg_samples = random.sample(total_neg_samples, num_neg_samples)
                selected_mix_samples = selected_neg_samples + common_pos_samples
                random.shuffle(selected_mix_samples)
                sequence_lst.append({'pos_samples': movie_watched_bucket,
                                    'mix_samples': selected_mix_samples,
                                    'next_pos_samples': movie_to_be_watched_bucket})
                n = n + m
            sequence_dict[user_id] = sequence_lst

        # Convert sequence_dict to DataFrame
        df = pd.DataFrame(sequence_dict.items(), columns=['user_id', 'sequences'])

        #df.to_csv(datapath+'final/'+'final_sequences.csv', index=False)

        return df


    def get_user_sequence_combinations(self):
        user_seq_lst = []
        for indx,row in self.sequences:    

            user_id = row['user_id']
            user_ob = self.users_map[user_id]
            sequence_lst = row['sequences']
            for seq in sequences:
                row_dict = {}
                pos_neg_samples = seq['mix_samples']  
                row_dict['movie_indices'] = np.array([self.movies_map[movie_id].encodedMovieId for movie_id in  pos_neg_samples],dtype="float")
                row_dict['movie_embeddings'] = np.array([self.movies_map[movie_id].desc_embedding for movie_id in pos_neg_samples],dtype="float")
                row_dict['movie_ratings'] =  np.array([user_ob.get_user_rating(movie_id) for movie_id in pos_neg_samples],dtype="float")
                row_dict['user_indices'] = np.array([user_ob.encodedUserId for movie_id in  pos_neg_samples],dtype="float")
                user_seq_lst.append(row_dict)

        return user_seq_lst
        


In [9]:
recommender = Recommender(movie_features_path,rating_features_path)

TypeError: Recommender.get_user_sequence_combinations() takes 0 positional arguments but 1 was given

In [None]:
recommender.sequences

In [12]:

class RecommendationPolicyCNN(nn.Module):
    def __init__(self, num_users, num_movies, user_embedding_dim,
                 movie_embedding_dim, mlp_dims, conv_out_channels, kernel_size):
        super(RecommendationPolicyCNN, self).__init__()
        self.num_users = num_users
        self.num_movies = num_movies
        self.user_embedding_dim = user_embedding_dim
        self.movie_embedding_dim = movie_embedding_dim
        self.conv_out_channels = conv_out_channels

        # User Embeddings
        self.user_embedding_gmf = nn.Embedding(num_users, user_embedding_dim)
        self.user_embedding_mlp = nn.Embedding(num_users, user_embedding_dim)

        # Convolutional layer for movie embeddings
        self.conv1d = nn.Conv1d(in_channels=1, out_channels=conv_out_channels, kernel_size=kernel_size, stride=1, padding=(kernel_size - 1) // 2)

        # GMF
        self.gmf_fc = nn.Linear(conv_out_channels, user_embedding_dim)

        # MLP
        mlp_input_dim = user_embedding_dim + conv_out_channels  # Concatenated dimension for user and movie embeddings
        self.mlp_fc_layers = nn.ModuleList([nn.Linear(mlp_input_dim, mlp_dims[0])])
        for in_size, out_size in zip(mlp_dims[:-1], mlp_dims[1:]):
            self.mlp_fc_layers.append(nn.Linear(in_size, out_size))

        # Final prediction layer outputs a score for each movie
        self.output_fc = nn.Linear(user_embedding_dim + mlp_dims[-1], 1)  # Adjusted to output a single score

    def forward(self, user_ids, movie_ids, movie_embeddings, ratings):
        # Get user embeddings
        user_embed_gmf = self.user_embedding_gmf(user_ids)  # Shape: [batch_size, user_embedding_dim]
        user_embed_mlp = self.user_embedding_mlp(user_ids)
    
        # Process movie embeddings through Conv1D
        movie_embeddings = movie_embeddings.unsqueeze(1)  # Shape: [batch_size, 1, movie_embedding_dim]
        conv_out = F.relu(self.conv1d(movie_embeddings))  # Shape after Conv1D: [batch_size, conv_out_channels, L]
    
        # Assuming we want to collapse the last dimension to align with user_embed_gmf for element-wise multiplication
        conv_out_flattened = conv_out.mean(dim=2)  # Averaging across the last dimension, Shape: [batch_size, conv_out_channels]
    
        # GMF path
        gmf_out = self.gmf_fc(conv_out_flattened)  # Ensure this operation results in Shape: [batch_size, user_embedding_dim]
        gmf_out = gmf_out * user_embed_gmf  # Now this should work as both tensors have a shape of [batch_size, user_embedding_dim]
    
        # MLP path
        mlp_input = torch.cat((user_embed_mlp, conv_out_flattened), dim=1)
        mlp_out = mlp_input
        for layer in self.mlp_fc_layers:
            mlp_out = F.relu(layer(mlp_out))
    
        # Combine GMF and MLP outputs
        combined_features = torch.cat((gmf_out, mlp_out), dim=1)
        scores = self.output_fc(combined_features).squeeze()
    
        # Adjust scores for visited and unvisited movies
        scores[ratings > 0] = float('-inf')  # Invalidate scores for visited movies
    
        # Get top 5 unvisited movie IDs based on scores
        _, top_indices = torch.topk(scores, 5)
        top_movie_ids = movie_ids[top_indices]
    
        return top_movie_ids

# Example of how to initialize and train the model with your environment `env`:
# from stable_baselines3 import PPO

# # Example usage
# num_users = 15000
# num_movies = 200
# user_embedding_dim=8
# movie_embedding_dim = 300
# mlp_dims = [512, 256, 128]
# conv_out_channels = 128
# kernel_size = 3


# # Assuming user_ids, movie_ids, movie_embeddings, and ratings tensors are defined elsewhere
# model = RecommendationPolicyCNN(num_users, num_movies, user_embedding_dim,
#                                  movie_embedding_dim, mlp_dims, conv_out_channels, kernel_size)


# user_ids = torch.tensor(np.array([24 for i in range(0,10)]))
# movie_ids = torch.tensor(np.array([7,14,21,28,35,42,49,56,63,70]))
# movie_embeddings = torch.randn(10, movie_embedding_dim) 
# ratings =  torch.tensor(np.array([5,0,0,4,0,2,0,3,0,5]))

# top_5_movie_ids = model(user_ids, movie_ids, movie_embeddings, ratings)
# top_5_movie_ids

In [13]:

class CustomRecommendationFeaturesExtractor(BaseFeaturesExtractor):
    """
    Feature extractor that adapts the input observation space to the inputs expected by RecommendationPolicyCNN.
    """
    def __init__(self, observation_space: spaces.Dict, num_users, num_movies, user_embedding_dim, movie_embedding_dim, mlp_dims, conv_out_channels, kernel_size):
        super().__init__(observation_space, features_dim=1)  # The features_dim parameter is arbitrary here
        
        self.recommendation_policy_cnn = RecommendationPolicyCNN(
            num_users=num_users,
            num_movies=num_movies,
            user_embedding_dim=user_embedding_dim,
            movie_embedding_dim=movie_embedding_dim,
            mlp_dims=mlp_dims,
            conv_out_channels=conv_out_channels,
            kernel_size=kernel_size
        )

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        # Extract components from the observations dictionary
        
        user_indices = torch.tensor(observations["user_indices"])
        movie_embeddings = torch.tensor(observations["movie_embeddings"])
        movie_ratings = torch.tensor(observations["movie_ratings"])
        movie_indices = torch.tensor(observations["movie_indices"])
        
      
        features = self.recommendation_policy_cnn(user_indices, movie_indices,movie_embeddings,ratings)
        return features

class CustomActorCriticPolicy(ActorCriticPolicy):
    """
    Custom Actor-Critic policy that uses the recommendation model's output as features for action and value prediction.
    """
    def __init__(self, observation_space: spaces.Dict, action_space: spaces.Discrete, lr_schedule, features_extractor_kwargs, **kwargs):
        super(CustomActorCriticPolicy, self).__init__(observation_space, action_space, lr_schedule, 
                                                      features_extractor_class=CustomRecommendationFeaturesExtractor,
                                                      features_extractor_kwargs=features_extractor_kwargs,
                                                      **kwargs)

        # The action_net predicts which movie to recommend (action_space.n options)
        self.action_net = nn.Linear(self.features_extractor.features_dim, action_space.n)
        # The value_net estimates the value of each state
        self.value_net = nn.Linear(self.features_extractor.features_dim, 1)

    def _forward(self, obs: torch.Tensor, deterministic: bool = False) -> torch.Tensor:
        features = self.extract_features(obs)        
        return self.action_net(features), self.value_net(features)

    def extract_features(self, obs: torch.Tensor) -> torch.Tensor:
        return self.features_extractor(obs)


In [None]:
import gym
from gym import spaces

class RecommenderEnv(gym.Env):
    def __init__(self, recommender):
        super(RecommenderEnv, self).__init__()
        self.recommender = recommender
        self.current_user_index = 0
        self.current_sequence_index = 0
        self.num_reco = 5
        
        # Example of action and observation space definitions
        # These should be tailored to your specific requirements
        self.action_space = spaces.Discrete(len(self.num_reco)) # Assuming actions are selecting among movies
        self.observation_space = spaces.Dict({
            "user_indices": spaces.Box(low=0, high=len(recommender.users_map), shape=(1,), dtype=int),
            "movie_indices": spaces.Box(low=0, high=len(recommender.movies_map), shape=(len(recommender.movies_map),), dtype=int),
            "movie_embeddings": spaces.Box(low=-float('inf'), high=float('inf'), shape=(len(recommender.movies_map), recommender.movie_embedding_dim), dtype=float),
            "ratings": spaces.Box(low=0, high=5, shape=(len(recommender.movies_map),), dtype=float) # Assuming ratings are 0-5
        })

    def step(self, action):
        # To Implement logic to use action and update the environment's state
        # For example, select a movie based on action and update user's seen list

        done = False

        # Logic to iterate through users and their sequences
        if self.current_sequence_index < len(self.recommender.sequences) - 1:
            self.current_sequence_index += 1
        else:
            self.current_user_index += 1
            self.current_sequence_index = 0
        
        if self.current_user_index >= len(self.recommender.users_map):
            done = True # End of episode

        # Placeholder for reward calculation and state update
        reward = 0
        state = self._get_current_state()
        
        return state, reward, done, {}

    def reset(self):
        # Reset the environment state to the beginning
        self.current_user_index = 0
        self.current_sequence_index = 0
        return self._get_current_state()
    
    def _get_current_state(self):
        # Implement logic to return the current state based on the current user and sequence index
        # This is where you'd extract the current user's pos_neg_samples, ratings, and movie_embeddings
        state = None # Placeholder
        return state

    def render(self, mode='human'):
        # Optional for visualization
        pass

    def close(self):
        # Optional cleanup
        pass


In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

def make_recommender_env(X, y, num_env):
    """
    Utility function for creating the vectorized environment.

    :param X: The input sequences.
    :param y: The target values.
    :param num_env: Number of parallel environments to create.
    :return: A vectorized Gym environment.
    """
    def _init():
        return RecommenderEnv(recommender)
    # Note: Removed the `env_id` parameter from the call
    return make_vec_env(_init, n_envs=num_env)

In [None]:
model = PPO(CustomActorCriticPolicy, env, verbose=1)
model.learn(total_timesteps=10000)