In [5]:
import json
from scipy.sparse import csr_matrix

# Load the data splits
with open('train_data.json', 'r') as json_file:
    train_data = json.load(json_file)

# Function to create the user-movie matrix with debug prints
def create_user_movie_matrix(data):
    rows = []
    cols = []
    vals = []

    user_ids = set()
    movie_ids = set()

    for user_id, ratings in data.items():
        for rating in ratings['ratings']:
            rows.append(user_id)
            cols.append(rating['movie_id'])
            vals.append(rating['rating'])
            user_ids.add(user_id)
            movie_ids.add(rating['movie_id'])

    #print("Unique user IDs:", user_ids)
    #print("Unique movie IDs:", movie_ids)
    
    user_index = {user_id: idx for idx, user_id in enumerate(user_ids)}
    movie_index = {movie_id: idx for idx, movie_id in enumerate(movie_ids)}
    
    #print("User Index Mapping:", user_index)
    #print("Movie Index Mapping:", movie_index)

    row_indices = [user_index[user_id] for user_id in rows]
    col_indices = [movie_index[movie_id] for movie_id in cols]

    user_movie_matrix = csr_matrix((vals, (row_indices, col_indices)), shape=(len(user_index), len(movie_index)))

    #print("Shape of user_movie_matrix:", user_movie_matrix.shape)
    #print("Non-zero entries in user_movie_matrix:", user_movie_matrix.nnz)

    return user_movie_matrix, user_index, movie_index

# Create the user-movie matrix for training data
train_matrix, train_user_index, train_movie_index = create_user_movie_matrix(train_data)

# Function to get users who reviewed a specific movie with debug prints
def get_users_who_reviewed(movie_id, user_movie_matrix, movie_index, user_index):
    print(f"Debug: Looking up users who reviewed movie ID {movie_id}")
    
    if movie_id in movie_index:
        movie_idx = movie_index[movie_id]
        #print(f"Debug: Index for movie ID {movie_id} is {movie_idx}")
        
        user_indices = user_movie_matrix[:, movie_idx].nonzero()[0]
        #print(f"Debug: User indices who reviewed movie {movie_id}:", user_indices)
        
        user_ids = [list(user_index.keys())[list(user_index.values()).index(idx)] for idx in user_indices]
        #print(f"Debug: User IDs who reviewed movie {movie_id}:", user_ids)
        
        return user_ids
    else:
        print(f"Movie ID {movie_id} not found in movie_index")
        return []

# Function to find neighboring users who have reviewed the same movies as the specified user with debug prints
def find_neighboring_users(user_id, user_index, user_movie_matrix, movie_index):
    if user_id in user_index:
        user_idx = user_index[user_id]
        #print(f"Debug: Index for user ID {user_id} is {user_idx}")
        user_rated_movies = user_movie_matrix[user_idx, :].nonzero()[1]
        #print(f"Debug: Movie indices rated by user {user_id}:", user_rated_movies)
        
        neighboring_users = set()
        for movie_idx in user_rated_movies:
            movie_id = list(movie_index.keys())[list(movie_index.values()).index(movie_idx)]
            users_reviewed = get_users_who_reviewed(movie_id, user_movie_matrix, movie_index, user_index)
            neighboring_users.update(users_reviewed)
        
        # Remove the user_id itself from the neighbors
        neighboring_users.discard(user_id)
        
        return list(neighboring_users)
    else:
        print(f"User ID {user_id} not found in user_index")
        return []


In [6]:
def print_first_n_rows(sparse_matrix, n=5):
    dense_matrix = sparse_matrix[:n].toarray()
    for i, row in enumerate(dense_matrix):
        print(f"Row {i}: {row}")

# Print the first 5 rows of the training matrix
print_first_n_rows(train_matrix, 5)

Row 0: [5 0 1 ... 0 0 0]
Row 1: [0 0 0 ... 0 0 0]
Row 2: [2 0 0 ... 0 0 0]
Row 3: [4 0 0 ... 0 0 0]
Row 4: [0 0 0 ... 0 0 0]


In [7]:
# Example usage: Get users who reviewed a specific movie in the training set
example_movie_id = 2622  # Replace with actual movie_id from train_data
if example_movie_id in train_movie_index:
    users_reviewed = get_users_who_reviewed(example_movie_id, train_matrix, train_movie_index, train_user_index)
    print(f"Users who reviewed the movie {example_movie_id}: {users_reviewed}")
else:
    print(f"Movie ID {example_movie_id} not found in training data")
    
# Example usage: Find neighboring users for a specific user
example_user_id = '2622'  # Replace with actual user_id from train_data
if example_user_id in train_user_index:
    neighbors = find_neighboring_users(example_user_id, train_user_index, train_matrix, train_movie_index)
    print(f"Neighboring users for {example_user_id}: {neighbors}")
else:
    print(f"User ID {example_user_id} not found in training data")

Debug: Looking up users who reviewed movie ID 2622
Users who reviewed the movie 2622: ['2611', '1483', '1680', '5282', '3675', '3297', '2761', '1930', '4005', '4700', '3072', '5198', '19', '879', '774', '1218', '4224', '310', '2010', '1602', '2344', '3274', '1015', '1678', '5054', '983', '3154', '4879', '166', '2212', '2988', '2103', '2878', '5378', '4299', '3913', '817', '5366', '3229', '155', '657', '546', '1016', '2129', '3232', '1436', '1557', '5151', '1450', '3465', '2063', '5752', '4042', '4371', '4336', '4827', '5653', '5786', '5246', '1101', '4957', '4439', '5011', '3654', '3026', '1181', '3479', '5734', '705', '850', '5893', '955', '4934', '3607', '4345', '3032', '1926', '4675', '78', '1203', '1726', '1494', '274', '1556', '214', '4868', '1246', '5063', '195', '754', '5106', '5374', '4271', '2939', '2962', '4797', '4580', '3311', '1086', '202', '5015', '5634', '5450', '4995', '3624', '83', '5787', '4436', '3007', '3664', '1051', '218', '3416', '2298', '3420', '2872', '2470', '

In [25]:
print(train_movie_index)

{1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 32: 31, 33: 32, 34: 33, 35: 34, 36: 35, 37: 36, 38: 37, 39: 38, 40: 39, 41: 40, 42: 41, 43: 42, 44: 43, 45: 44, 46: 45, 47: 46, 48: 47, 49: 48, 50: 49, 52: 50, 53: 51, 54: 52, 55: 53, 56: 54, 57: 55, 58: 56, 59: 57, 60: 58, 61: 59, 62: 60, 63: 61, 64: 62, 65: 63, 66: 64, 67: 65, 68: 66, 69: 67, 70: 68, 71: 69, 72: 70, 73: 71, 74: 72, 75: 73, 76: 74, 77: 75, 78: 76, 79: 77, 80: 78, 81: 79, 82: 80, 83: 81, 84: 82, 85: 83, 86: 84, 87: 85, 88: 86, 89: 87, 90: 88, 92: 89, 93: 90, 94: 91, 95: 92, 96: 93, 97: 94, 98: 95, 99: 96, 100: 97, 101: 98, 102: 99, 103: 100, 104: 101, 105: 102, 106: 103, 107: 104, 108: 105, 110: 106, 111: 107, 112: 108, 113: 109, 114: 110, 116: 111, 117: 112, 118: 113, 119: 114, 120: 115, 121: 116, 122: 117, 123: 118, 124: 119, 125: 120, 126: 

In [10]:
def pgraph_rag_neighbors_ratings_only(user_id, movie_id, user_movie_matrix, user_index, movie_index):
    # Retrieve the movie index
    movie_idx = movie_index.get(movie_id)
    if movie_idx is None:
        return []
    
    # Get the user indices who reviewed the movie
    user_indices = user_movie_matrix[:, movie_idx].nonzero()[0]
    
    # Get the corresponding user IDs and their ratings for the movie
    neighbor_ratings = [(list(user_index.keys())[list(user_index.values()).index(idx)], user_movie_matrix[idx, movie_idx]) for idx in user_indices]
    
    return neighbor_ratings

# Example usage
example_user_id = '2622'  # Replace with an actual user_id
example_movie_id = 2622           # Replace with an actual movie_id
neighbors_ratings = pgraph_rag_neighbors_ratings_only(example_user_id, example_movie_id, train_matrix, train_user_index, train_movie_index)
print(f"Ratings by neighbors for movie {example_movie_id}: {neighbors_ratings}")


Ratings by neighbors for movie 2622: [('2611', 2), ('1483', 4), ('1680', 4), ('5282', 4), ('3675', 3), ('3297', 3), ('2761', 3), ('1930', 3), ('4005', 2), ('4700', 4), ('3072', 3), ('5198', 2), ('19', 5), ('879', 3), ('774', 3), ('1218', 5), ('4224', 2), ('310', 4), ('2010', 4), ('1602', 4), ('2344', 5), ('3274', 3), ('1015', 4), ('1678', 3), ('5054', 3), ('983', 3), ('3154', 5), ('4879', 3), ('166', 2), ('2212', 1), ('2988', 3), ('2103', 3), ('2878', 3), ('5378', 4), ('4299', 5), ('3913', 2), ('817', 4), ('5366', 3), ('3229', 2), ('155', 2), ('657', 4), ('546', 4), ('1016', 4), ('2129', 2), ('3232', 2), ('1436', 4), ('1557', 5), ('5151', 3), ('1450', 2), ('3465', 3), ('2063', 3), ('5752', 3), ('4042', 3), ('4371', 3), ('4336', 1), ('4827', 3), ('5653', 4), ('5786', 2), ('5246', 2), ('1101', 1), ('4957', 4), ('4439', 3), ('5011', 4), ('3654', 2), ('3026', 5), ('1181', 3), ('3479', 5), ('5734', 3), ('705', 2), ('850', 2), ('5893', 4), ('955', 4), ('4934', 3), ('3607', 4), ('4345', 3), (

In [11]:
def pgraph_rag_neighbors_ratings_and_user_ratings(user_id, movie_id, user_movie_matrix, user_index, movie_index):
    # Retrieve the ratings from neighbors
    neighbor_ratings = pgraph_rag_neighbors_ratings_only(user_id, movie_id, user_movie_matrix, user_index, movie_index)
    
    # Retrieve the user's own ratings
    user_idx = user_index.get(user_id)
    if user_idx is None:
        return neighbor_ratings
    
    user_ratings = [(movie, user_movie_matrix[user_idx, movie_index[movie]]) for movie in movie_index.keys() if user_movie_matrix[user_idx, movie_index[movie]] != 0]
    
    # Combine both sets of ratings
    combined_ratings = neighbor_ratings + user_ratings
    
    return combined_ratings

# Example usage
combined_ratings = pgraph_rag_neighbors_ratings_and_user_ratings(example_user_id, example_movie_id, train_matrix, train_user_index, train_movie_index)
print(f"Combined ratings for user {example_user_id} and movie {example_movie_id}: {combined_ratings}")


Combined ratings for user 2622 and movie 2622: [('2611', 2), ('1483', 4), ('1680', 4), ('5282', 4), ('3675', 3), ('3297', 3), ('2761', 3), ('1930', 3), ('4005', 2), ('4700', 4), ('3072', 3), ('5198', 2), ('19', 5), ('879', 3), ('774', 3), ('1218', 5), ('4224', 2), ('310', 4), ('2010', 4), ('1602', 4), ('2344', 5), ('3274', 3), ('1015', 4), ('1678', 3), ('5054', 3), ('983', 3), ('3154', 5), ('4879', 3), ('166', 2), ('2212', 1), ('2988', 3), ('2103', 3), ('2878', 3), ('5378', 4), ('4299', 5), ('3913', 2), ('817', 4), ('5366', 3), ('3229', 2), ('155', 2), ('657', 4), ('546', 4), ('1016', 4), ('2129', 2), ('3232', 2), ('1436', 4), ('1557', 5), ('5151', 3), ('1450', 2), ('3465', 3), ('2063', 3), ('5752', 3), ('4042', 3), ('4371', 3), ('4336', 1), ('4827', 3), ('5653', 4), ('5786', 2), ('5246', 2), ('1101', 1), ('4957', 4), ('4439', 3), ('5011', 4), ('3654', 2), ('3026', 5), ('1181', 3), ('3479', 5), ('5734', 3), ('705', 2), ('850', 2), ('5893', 4), ('955', 4), ('4934', 3), ('3607', 4), ('43

In [12]:
def pgraph_multi_rag(user_id, movie_id, user_movie_matrix, user_index, movie_index):
    # Retrieve the ratings from neighbors
    neighbor_ratings = pgraph_rag_neighbors_ratings_only(user_id, movie_id, user_movie_matrix, user_index, movie_index)
    
    # Retrieve the user's own ratings
    user_idx = user_index.get(user_id)
    if user_idx is None:
        return neighbor_ratings, []
    
    user_ratings = [(movie, user_movie_matrix[user_idx, movie_index[movie]]) for movie in movie_index.keys() if user_movie_matrix[user_idx, movie_index[movie]] != 0]
    
    return neighbor_ratings, user_ratings

# Example usage
neighbors_ratings, user_ratings = pgraph_multi_rag(example_user_id, example_movie_id, train_matrix, train_user_index, train_movie_index)
print(f"Neighbor ratings for movie {example_movie_id}: {neighbors_ratings}")
print(f"User {example_user_id}'s ratings: {user_ratings}")


Neighbor ratings for movie 2622: [('2611', 2), ('1483', 4), ('1680', 4), ('5282', 4), ('3675', 3), ('3297', 3), ('2761', 3), ('1930', 3), ('4005', 2), ('4700', 4), ('3072', 3), ('5198', 2), ('19', 5), ('879', 3), ('774', 3), ('1218', 5), ('4224', 2), ('310', 4), ('2010', 4), ('1602', 4), ('2344', 5), ('3274', 3), ('1015', 4), ('1678', 3), ('5054', 3), ('983', 3), ('3154', 5), ('4879', 3), ('166', 2), ('2212', 1), ('2988', 3), ('2103', 3), ('2878', 3), ('5378', 4), ('4299', 5), ('3913', 2), ('817', 4), ('5366', 3), ('3229', 2), ('155', 2), ('657', 4), ('546', 4), ('1016', 4), ('2129', 2), ('3232', 2), ('1436', 4), ('1557', 5), ('5151', 3), ('1450', 2), ('3465', 3), ('2063', 3), ('5752', 3), ('4042', 3), ('4371', 3), ('4336', 1), ('4827', 3), ('5653', 4), ('5786', 2), ('5246', 2), ('1101', 1), ('4957', 4), ('4439', 3), ('5011', 4), ('3654', 2), ('3026', 5), ('1181', 3), ('3479', 5), ('5734', 3), ('705', 2), ('850', 2), ('5893', 4), ('955', 4), ('4934', 3), ('3607', 4), ('4345', 3), ('303

In [8]:
val_matrix, val_user_index, val_movie_index = create_user_movie_matrix(val_data)
test_matrix, test_user_index, test_movie_index = create_user_movie_matrix(test_data)