In [26]:
import numpy as np

# Helper functions for sigmoid and gradient
def sigmoid(x):
    # Clip values to prevent overflow
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

# FPMC Model
class FPMC:
    def __init__(self, num_users, num_items, num_factors, learning_rate=0.001, reg=10):
        self.num_users = num_users
        self.num_items = num_items
        self.num_factors = num_factors
        self.learning_rate = learning_rate
        self.reg = reg
        
        # Latent factor matrices for users and items
        self.U = np.random.normal(scale=1./num_factors, size=(num_users, num_factors))  # User preferences
        self.V = np.random.normal(scale=1./num_factors, size=(num_items, num_factors))  # Item latent vectors
        self.H = np.random.normal(scale=1./num_factors, size=(num_items, num_factors))  # Item transition vectors

    def predict(self, u, i, j):
        """
        Predict the score for user u interacting with item j after item i.
        P(u, j | i) = <U_u, V_j> + <V_i, H_j>
        """
        user_preference = np.dot(self.U[u], self.V[j])  # User preferences for item j
        item_transition = np.dot(self.V[i], self.H[j])   # Sequential transition from i to j
        return user_preference + item_transition
    
    def train(self, user_item_seq, num_epochs=10):
        """
        Train the FPMC model using pairwise ranking optimization.
        
        :param user_item_seq: List of tuples (user, current_item, next_item)
        :param num_epochs: Number of training epochs
        """
        for epoch in range(num_epochs):
            total_loss = 0
            for (u, i, j) in user_item_seq:
                # Random negative sample (a wrong next item)
                k = np.random.randint(self.num_items)
                while k == j:
                    k = np.random.randint(self.num_items)
                
                # Predict the scores for correct and incorrect next items
                correct_score = self.predict(u, i, j)
                incorrect_score = self.predict(u, i, k)
                
                # Compute the loss and gradients using sigmoid ranking loss
                delta = sigmoid(correct_score - incorrect_score)
                total_loss += -np.log(delta)
                
                # Update user, item, and transition factors using gradient descent
                self.U[u] += self.learning_rate * (delta * (self.V[j] - self.V[k]) - self.reg * self.U[u])
                self.V[j] += self.learning_rate * (delta * (self.U[u] + self.V[i] - self.H[k]) - self.reg * self.V[j])
                self.V[k] += self.learning_rate * (-delta * self.U[u] - self.reg * self.V[k])
                self.H[j] += self.learning_rate * (delta * self.V[i] - self.reg * self.H[j])
                self.H[k] += self.learning_rate * (-delta * self.V[i] - self.reg * self.H[k])
                self.V[i] += self.learning_rate * (delta * (self.H[j] - self.H[k]) - self.reg * self.V[i])

            print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss:.4f}')
    
    def recommend(self, u, i, top_n=5):
        """
        Recommend top N items for user u after interacting with item i.
        """
        scores = [self.predict(u, i, j) for j in range(self.num_items)]
        top_items = np.argsort(scores)[-top_n:][::-1]
        return top_items

# Example usage:
num_users = 100  # Number of users in the dataset
num_items = 1000  # Number of items in the dataset
num_factors = 10  # Latent factor size

# Initialize FPMC model
fpmc_model = FPMC(num_users, num_items, num_factors)

# Example user-item sequence for training (user_id, current_item_id, next_item_id)
user_item_seq = [(0, 10, 15), (1, 20, 25), (0, 15, 18), (2, 30, 35)]  # Replace with actual user-item interactions

# Train the model
fpmc_model.train(user_item_seq, num_epochs=5)

# Recommend next items for user 0 after interacting with item 10
recommendations = fpmc_model.recommend(0, 10, top_n=5)
print(f'Recommended items: {recommendations}')


Epoch 1/5, Loss: 2.8051
Epoch 2/5, Loss: 2.8036
Epoch 3/5, Loss: 2.7655
Epoch 4/5, Loss: 2.7888
Epoch 5/5, Loss: 2.7560
Recommended items: [487 577 849 105 324]


In [31]:
import numpy as np

def sigmoid(x):
    # Clip values to prevent overflow
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

def clip_gradients(gradient, threshold=1):
    # Clip the gradients to avoid explosion
    norm = np.linalg.norm(gradient, ord=2)
    if norm > threshold:
        gradient = gradient * threshold / norm
    return gradient

class FPMC:
    def __init__(self, num_users, num_items, num_factors, learning_rate=0.001, reg=10):
        self.num_users = num_users
        self.num_items = num_items
        self.num_factors = num_factors
        self.learning_rate = learning_rate
        self.reg = reg
        
        # Latent factor matrices for users and items
        self.U = np.random.normal(scale=1./num_factors, size=(num_users, num_factors))  # User preferences
        self.V = np.random.normal(scale=1./num_factors, size=(num_items, num_factors))  # Item latent vectors
        self.H = np.random.normal(scale=1./num_factors, size=(num_items, num_factors))  # Item transition vectors

    def predict(self, u, i, j):
        """Predict the score for user u interacting with item j after item i."""
        user_preference = np.dot(self.U[u], self.V[j])  # User preferences for item j
        item_transition = np.dot(self.V[i], self.H[j])   # Sequential transition from i to j
        return user_preference + item_transition
    
    def train(self, user_item_seq, num_epochs=10):
        """Train the FPMC model using pairwise ranking optimization."""
        for epoch in range(num_epochs):
            total_loss = 0
            for (u, i, j) in user_item_seq:
                # Random negative sample (a wrong next item)
                k = np.random.randint(self.num_items)
                while k == j:
                    k = np.random.randint(self.num_items)
                
                # Predict the scores for correct and incorrect next items
                correct_score = self.predict(u, i, j)
                incorrect_score = self.predict(u, i, k)
                
                # Compute the loss and gradients using sigmoid ranking loss
                delta = sigmoid(correct_score - incorrect_score)
                delta = np.clip(delta, 1e-10, 1-1e-10)  # Prevent log(0)
                total_loss += -np.log(delta)
                
                # Update user, item, and transition factors using gradient descent
                self.U[u] += self.learning_rate * clip_gradients(delta * (self.V[j] - self.V[k]) - self.reg * self.U[u])
                self.V[j] += self.learning_rate * clip_gradients(delta * (self.U[u] + self.V[i] - self.H[k]) - self.reg * self.V[j])
                self.V[k] += self.learning_rate * clip_gradients(-delta * self.U[u] - self.reg * self.V[k])
                self.H[j] += self.learning_rate * clip_gradients(delta * self.V[i] - self.reg * self.H[j])
                self.H[k] += self.learning_rate * clip_gradients(-delta * self.V[i] - self.reg * self.H[k])
                self.V[i] += self.learning_rate * clip_gradients(delta * (self.H[j] - self.H[k]) - self.reg * self.V[i])

            print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss:.4f}')
    
    def recommend(self, u, i, top_n=5):
        """Recommend top N items for user u after interacting with item i."""
        scores = [self.predict(u, i, j) for j in range(self.num_items)]
        top_items = np.argsort(scores)[-top_n:][::-1]
        return top_items

# Example usage of the FPMC class would involve initializing the class with user and item counts, training on sequences of user-item interactions, and using the model to make recommendations.


In [32]:
import pandas as pd

# Load the MovieLens dataset (ratings and movies)
ratings = pd.read_csv('/home/manishn/recommend/data/ml-latest-small/ratings.csv')
movies = pd.read_csv('/home/manishn/recommend/data/ml-latest-small/movies.csv')

# Merge the ratings and movies data
merged_df = pd.merge(ratings, movies, on='movieId')
merged_df = merged_df[['userId', 'movieId', 'title', 'genres', 'rating', 'timestamp']]

# Sort the data by userId and timestamp to simulate the sequence of interactions
merged_df = merged_df.sort_values(by=['userId', 'timestamp'])

# Display the first few rows to check
print(merged_df.head())


       userId  movieId                                              title  \
5540        1      804                               She's the One (1996)   
7557        1     1210  Star Wars: Episode VI - Return of the Jedi (1983)   
10740       1     2018                                       Bambi (1942)   
13084       1     2628   Star Wars: Episode I - The Phantom Menace (1999)   
13823       1     2826                           13th Warrior, The (1999)   

                         genres  rating  timestamp  
5540             Comedy|Romance     4.0  964980499  
7557    Action|Adventure|Sci-Fi     5.0  964980499  
10740  Animation|Children|Drama     5.0  964980523  
13084   Action|Adventure|Sci-Fi     4.0  964980523  
13823  Action|Adventure|Fantasy     4.0  964980523  


In [33]:
# Create mappings for userId and movieId
unique_users = merged_df['userId'].unique()
unique_movies = merged_df['movieId'].unique()

# Create dictionaries to map original IDs to sequential indices
user_id_map = {original_id: idx for idx, original_id in enumerate(unique_users)}
movie_id_map = {original_id: idx for idx, original_id in enumerate(unique_movies)}

# Apply the mapping to the dataset
merged_df['userId'] = merged_df['userId'].map(user_id_map)
merged_df['movieId'] = merged_df['movieId'].map(movie_id_map)

# Now userId and movieId are sequential, starting from 0


In [34]:
# Prepare the data: Create user-item sequences
user_item_sequences = []

# Iterate through each user's rating sequence
for user_id, group in merged_df.groupby('userId'):
    movie_sequence = list(group['movieId'])  # Get the sequence of movies watched by the user
    for i in range(len(movie_sequence) - 1):  # Create transitions
        current_movie = movie_sequence[i]
        next_movie = movie_sequence[i + 1]
        user_item_sequences.append((user_id, current_movie, next_movie))

# Example user-item sequence  (user_id, current_item_id, next_item_id)
print(user_item_sequences[:5])  # Output the first 5 transitions


[(0, 0, 1), (0, 1, 2), (0, 2, 3), (0, 3, 4), (0, 4, 5)]


In [35]:
# Initialize the FPMC model with the correct number of users and movies
num_users = merged_df['userId'].nunique()  # Number of unique users
num_items = merged_df['movieId'].nunique()  # Number of unique movies

fpmc_model = FPMC(num_users=num_users, num_items=num_items, num_factors=200)

# Train the model with the user-item sequences
fpmc_model.train(user_item_sequences, num_epochs=30)



Epoch 1/30, Loss: 69471.3905
Epoch 2/30, Loss: 69471.3062
Epoch 3/30, Loss: 69471.3040
Epoch 4/30, Loss: 69471.3354
Epoch 5/30, Loss: 69471.3352
Epoch 6/30, Loss: 69471.3393
Epoch 7/30, Loss: 69471.3426
Epoch 8/30, Loss: 69471.3467
Epoch 9/30, Loss: 69471.3499
Epoch 10/30, Loss: 69471.3544
Epoch 11/30, Loss: 69471.3570
Epoch 12/30, Loss: 69471.3591
Epoch 13/30, Loss: 69471.3625
Epoch 14/30, Loss: 69471.3630
Epoch 15/30, Loss: 69471.3639
Epoch 16/30, Loss: 69471.3649
Epoch 17/30, Loss: 69471.3660
Epoch 18/30, Loss: 69471.3664
Epoch 19/30, Loss: 69471.3671
Epoch 20/30, Loss: 69471.3677
Epoch 21/30, Loss: 69471.3679
Epoch 22/30, Loss: 69471.3682
Epoch 23/30, Loss: 69471.3684
Epoch 24/30, Loss: 69471.3686
Epoch 25/30, Loss: 69471.3688
Epoch 26/30, Loss: 69471.3688
Epoch 27/30, Loss: 69471.3690
Epoch 28/30, Loss: 69471.3690
Epoch 29/30, Loss: 69471.3691
Epoch 30/30, Loss: 69471.3691


In [36]:
# Create reverse mappings to map indices back to original movieId and titles
reverse_movie_id_map = {idx: original_id for original_id, idx in movie_id_map.items()}
movie_id_to_title = dict(zip(merged_df['movieId'], merged_df['title']))
# Function to get original movieId and title from the recommended indices
def get_movie_recommendations(user_id, current_movie_id, top_n=5):
    # Get the recommended movie indices
    recommended_movie_indices = fpmc_model.recommend(user_id, current_movie_id, top_n=top_n)
    
    # Map back to original movieId and title
    recommended_movies = [(reverse_movie_id_map[movie_idx], movie_id_to_title[reverse_movie_id_map[movie_idx]]) 
                          for movie_idx in recommended_movie_indices]
    
    return recommended_movies
# Example: Recommend movies for user 0 after watching movie with index 10 (based on sequential mapping)
user_id = 0  # Example user
current_movie_id = 10  # Example current movie (index in sequential mapping)

# Get the top 5 recommendations
recommended_movies = get_movie_recommendations(user_id, current_movie_id, top_n=5)

# Print the original movieId and title for the recommended movies
print("Recommended movies for user", user_id, "after movie", movie_id_to_title[reverse_movie_id_map[current_movie_id]] + ":")
for movie_id, title in recommended_movies:
    print(f"Movie ID: {movie_id}, Title: {title}")


KeyError: 182293