In [1]:
import pickle
import networkx as nx
from node2vec import Node2Vec
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from collections import defaultdict
import copy

In [2]:
with open(r"./graphs/bipartite_graph.pickle", "rb") as input_file:
        bipartite_graph = pickle.load(input_file)

In [3]:
print(bipartite_graph)

Graph with 10527 nodes and 13021382 edges


In [27]:
## Link Prediction with Jaccard's coefficient:
def link_prediction(bipartite_graph: nx.Graph, N: int):
    user_nodes = [node for node in bipartite_graph.nodes() if bipartite_graph.nodes[node]['bipartite'] == 1]
    preds_jaccard = nx.jaccard_coefficient(bipartite_graph)

    # For each user, compute the top N recommendations
    recommendations = dict()

    for u, v, p in preds_jaccard:
        if u in user_nodes and bipartite_graph.nodes[v]['bipartite'] == 0:
            if u not in recommendations:
                recommendations[u] = [(v, p)]
            else:
                recommendations[u].append((v, p))

    # Sort recommendations for each user and keep only the top N
    for u in recommendations:
        recommendations[u] = sorted(recommendations[u], key=lambda x: x[1], reverse=True)[:N]
    return recommendations


In [28]:
link_prediction(bipartite_graph, 3)

{'David O': [(332, 0.08421052631578947),
  (397, 0.08),
  (185, 0.07954545454545454)],
 'Dev V S': [(538, 0.25), (529, 0.14285714285714285), (528, 0.125)],
 'Alessandro A': [(538, 0.1),
  (529, 0.07692307692307693),
  (528, 0.07142857142857142)],
 'ANSHUL J': [(538, 0.25), (529, 0.14285714285714285), (528, 0.125)],
 'Jesse E': [(482, 0.027777777777777776),
  (507, 0.018867924528301886),
  (516, 0.018867924528301886)],
 'Murtadha A': [(538, 0.16666666666666666),
  (529, 0.1111111111111111),
  (528, 0.1)]}

In [73]:
bipartite_graph.nodes

NodeView((0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224

In [38]:
def generate_recommendations_Node2Vec(bipartite_graph, top_n):
    # Create node2vec embeddings
    node2vec = Node2Vec(bipartite_graph, dimensions=64, walk_length=30, num_walks=200, workers=4)
    model = node2vec.fit(window=10, min_count=1, batch_words=4)

    # Separate user and course nodes
    user_nodes = {node for node, data in bipartite_graph.nodes(data=True) if data['bipartite'] == 1}
    course_nodes = set(bipartite_graph) - user_nodes

    # Initialize dictionary for storing recommendations
    recommendations = {}

    # Iterate over each user
    for user in user_nodes:
        # Get courses rated by user
        rated_courses = [node for node in bipartite_graph.neighbors(user)]

        # Get embeddings for rated courses, but only include those that have valid embeddings
        rated_course_embeddings = [model.wv[course] for course in rated_courses if course in model.wv]

        # If there are no valid embeddings, skip this user
        if not rated_course_embeddings:
            continue

        # Compute mean of rated course embeddings
        mean_embedding = np.mean(rated_course_embeddings, axis=0)

        # Compute cosine similarity of mean_embedding to all course nodes
        similarities = []
        for course in course_nodes:
            if course in model.wv:  # Ensure the course has a valid embedding
                sim = cosine_similarity([mean_embedding], [model.wv[course]])
                similarities.append((course, sim[0][0]))  # sim is a 2D array

        # Sort courses by similarity and get top_n
        top_courses = sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]

        # Add recommendations for user to dictionary
        recommendations[user] = top_courses

    return recommendations


In [39]:
generate_recommendations_Node2Vec(bipartite_graph, 3)

Computing transition probabilities:   0%|          | 0/1527 [00:00<?, ?it/s]

{'David O': [(251, 0.7007881), (294, 0.69849503), (270, 0.68720007)],
 'Hanumant K': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'Remya B': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'Sagar D': [(289, 0.6187321), (0, 0.6130148), (243, 0.6068911)],
 'Niranjan O': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'Akhila Z': [(403, 0.7661848), (309, 0.7043947), (494, 0.6832459)],
 'Atul D': [(145, 0.6966288), (243, 0.688935), (269, 0.68673664)],
 'Maoti G': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'Brian M': [(466, 0.6292896), (449, 0.62740767), (452, 0.6272808)],
 'Rohit K': [(309, 0.65047646), (335, 0.6385255), (323, 0.634089)],
 'Aseem J': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'Joshua Z P L': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'IEVGENIIA K': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'Ritwik K P': [(309, 0.86428475), (335, 0.8609277), (323, 0.84472317)],
 'David M': [

In [5]:
def split_train_test(bipartite_graph, test_size=0.2):
    training_graph = bipartite_graph.copy()
    test_edges = []

    user_nodes = {node for node, data in bipartite_graph.nodes(data=True) if data['bipartite'] == 1}
    course_nodes = set(bipartite_graph) - user_nodes

    for user in user_nodes:
        neighbors = [node for node in bipartite_graph.neighbors(user) if node in course_nodes]
        if neighbors:  # Check if the user has rated any course
            num_test = max(1, int(test_size * len(neighbors)))  # Ensure at least 1 test example per user

            test_neighbors = np.random.choice(neighbors, size=num_test, replace=False)
            test_edges.extend([(user, course) for course in test_neighbors])

            for neighbor in test_neighbors:
                if training_graph.has_edge(user, neighbor):  # Check if edge exists before removing
                    training_graph.remove_edge(user, neighbor)

    return training_graph, test_edges




In [4]:
def evaluate_model(test_edges, training_graph, recommendations, top_n):
    hits = 0
    precision_scores = []
    recall_scores = []
    f1_scores = []
    dcg_scores = []

    for user, courses in recommendations.items():
        user_test_courses = [course for user_test, course in test_edges if user_test == user]

        if not user_test_courses:
            continue

        user_training_courses = [course for course in training_graph.neighbors(user) if training_graph.nodes[course]['bipartite'] == 0]
        rec_courses = [course for course, score in courses[:top_n] if course not in user_training_courses]
        hits += len(set(user_test_courses) & set(rec_courses))

        # Calculate Precision, Recall and F1 score
        tp = len(set(user_test_courses) & set(rec_courses))
        fp = len(set(rec_courses) - set(user_test_courses))
        fn = len(set(user_test_courses) - set(rec_courses))

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * ((precision * recall) / (precision + recall)) if (precision + recall) > 0 else 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)

        # Calculate Normalized Discounted Cumulative Gain (NDCG)
        dcg = sum([int(rec_courses[i] in user_test_courses) / np.log2(i + 2) for i in range(min(top_n, len(rec_courses)))])
        idcg = sum([1 / np.log2(i + 2) for i in range(min(top_n, len(user_test_courses)))])
        ndcg = dcg / idcg if idcg > 0 else 0
        dcg_scores.append(ndcg)

    hit_rate = hits / len(test_edges)
    mean_precision = np.mean(precision_scores)
    mean_recall = np.mean(recall_scores)
    mean_f1 = np.mean(f1_scores)
    mean_ndcg = np.mean(dcg_scores)

    return hit_rate, mean_precision, mean_recall, mean_f1, mean_ndcg


In [147]:
training_graph, test_edges = split_train_test(bipartite_graph)
recommendations = generate_recommendations_Node2Vec(training_graph, 5)
evaluate_model(test_edges, recommendations, 5)

Computing transition probabilities:   0%|          | 0/727 [00:00<?, ?it/s]

(0.007792207792207792,
 0.0031578947368421056,
 0.002521929824561403,
 0.002627781575149996,
 0.002377919991457658)

In [71]:
test_edges

[('Sean G', 403),
 ('William J', 124),
 ('William J', 153),
 ('Samuel D', 335),
 ('Samuel D', 232),
 ('Jonathan L', 273),
 ('Jonathan L', 95),
 ('Jonathan L', 395),
 ('Jonathan L', 31),
 ('Jonathan L', 535),
 ('Daniel F', 403),
 ('Daniel F', 114),
 ('Daniel F', 427),
 ('Daniel F', 371),
 ('Daniel F', 65),
 ('Robert S', 385),
 ('Robert S', 35),
 ('Robert S', 411),
 ('Robert S', 145),
 ('Robert S', 337),
 ('Robert S', 80),
 ('Robert S', 1),
 ('Robert S', 383),
 ('Ryan H', 454),
 ('Ryan H', 33),
 ('Ryan H', 273),
 ('Jacob D', 475),
 ('Logan D', 63),
 ('Geoff T', 11)]

In [72]:
recommendations

{'Scot A W': [(520, 0.9925473),
  (521, 0.9916561),
  (519, 0.9915676),
  (524, 0.98977554),
  (522, 0.987105)],
 'Dale B': [(520, 0.99304205),
  (519, 0.9914625),
  (522, 0.99069667),
  (524, 0.9889353),
  (521, 0.9883477)],
 'Sean G': [(136, 0.7646039),
  (162, 0.75667197),
  (195, 0.7423419),
  (135, 0.7364006),
  (176, 0.7267091)],
 'William J': [(522, 0.91055095),
  (521, 0.90905213),
  (520, 0.906904),
  (519, 0.9057654),
  (524, 0.90161276)],
 'Samuel D': [(287, 0.71510315),
  (309, 0.70540947),
  (295, 0.6848629),
  (378, 0.6821819),
  (195, 0.6818017)],
 'Jonathan L': [(400, 0.6107042),
  (227, 0.6024594),
  (224, 0.59901357),
  (213, 0.59793365),
  (206, 0.5967985)],
 'Daniel F': [(195, 0.5894977),
  (168, 0.5849072),
  (136, 0.5820477),
  (162, 0.58054614),
  (362, 0.57871807)],
 'Robert S': [(428, 0.59157705),
  (406, 0.589565),
  (404, 0.5865266),
  (400, 0.58315516),
  (418, 0.58143854)],
 'Gabriel E R': [(520, 0.9930135),
  (519, 0.99187887),
  (521, 0.99125725),
  (522,

## Personalised PageRank

In [6]:
def remove_same_type_edges(bipartite_graph):
    user_item_graph = bipartite_graph.copy()

    # Create a list to store the edges to remove
    edges_to_remove = []

    # Iterate over the edges
    for edge in user_item_graph.edges():
        # Check if the nodes belong to the same bipartite set
        if user_item_graph.nodes[edge[0]]['bipartite'] == user_item_graph.nodes[edge[1]]['bipartite']:
            edges_to_remove.append(edge)

    # Remove the edges
    user_item_graph.remove_edges_from(edges_to_remove)

    return user_item_graph

In [None]:
def calculate_ppr(bipartite_graph, alpha=0.85, max_iter=100):
    ppr_dict = {}

    # Extract user nodes from the bipartite graph
    user_nodes = {node for node, attr in bipartite_graph.nodes(data=True) if attr['bipartite'] == 1}

    for user_node in user_nodes:
        # Create a personalization dict where only the current user_node has a non-zero value
        personalization = {node: 0 for node in bipartite_graph.nodes()}
        personalization[user_node] = 1

        # Compute personalized PageRank
        ppr = nx.pagerank(bipartite_graph, alpha=alpha, personalization=personalization, max_iter=max_iter, weight='weight')

        # Filter out user nodes and retain only course nodes
        ppr_dict[user_node] = {node: rank for node, rank in ppr.items() if bipartite_graph.nodes[node]['bipartite'] == 0}

    return ppr_dict

def recommend_courses(ppr_dict, bipartite_graph, user_node):
    recommendations = {}

    # Select courses the user has already taken
    taken_courses = {course for course in bipartite_graph.neighbors(user_node) if bipartite_graph.nodes[course]['bipartite'] == 0}

    # Look at each similar user and promote courses that are highly ranked by the similar user
    for similar_user_node, edge_data in bipartite_graph[user_node].items():
        if bipartite_graph.nodes[similar_user_node]['bipartite'] == 1:  # Ensure the node is a user node
            for course_node, ppr_score in ppr_dict[similar_user_node].items():
                # Increase the recommendation score of the course based on the PPR score and the user similarity
                # if course_node not in taken_courses:
                recommendations[course_node] = (recommendations.get(course_node, 0) + ppr_score) * edge_data['weight']

    # Look at each course the user has rated and promote similar courses
    for course_node, edge_data in bipartite_graph[user_node].items():
        if bipartite_graph.nodes[course_node]['bipartite'] == 0:  # Ensure the node is a course node
            sentiment = edge_data['positive_sentiment'] - edge_data['negative_sentiment']
            for similar_course_node in bipartite_graph[course_node]:
                # Increase the recommendation score of the similar course based on the PPR score, course similarity, and sentiments
                if similar_course_node not in recommendations:
                    recommendations[similar_course_node] = edge_data['rating'] * sentiment * ppr_dict[user_node].get(similar_course_node, 0)

    return recommendations


# ppr_dict = calculate_ppr(bipartite_graph)
# recommendations = {user_node: recommend_courses(ppr_dict, bipartite_graph, user_node) for user_node in ppr_dict}
# Step 1: Split data into training set and test set
training_graph, test_edges = split_train_test(bipartite_graph, test_size=0.2)

# Step 2: Generate recommendations based on training data
ppr_dict = calculate_ppr(remove_same_type_edges(training_graph))
recommendations = {user_node: sorted(recommend_courses(ppr_dict, training_graph, user_node).items(), key=lambda x: x[1], reverse=True) for user_node in ppr_dict}

# Step 3: Evaluate the model on the test set
hit_rate, mean_precision, mean_recall, mean_f1, mean_ndcg = evaluate_model(test_edges, training_graph, recommendations, top_n=3)

print("Hit Rate: ", hit_rate)
print("Mean Precision: ", mean_precision)
print("Mean Recall: ", mean_recall)
print("Mean F1-Score: ", mean_f1)
print("Mean NDCG: ", mean_ndcg)


In [195]:
def calculate_user_coverage_at_k(recommendations, test_edges, k):
    coverage_scores = {}

    for user, recs in recommendations.items():
        # Get the top-k recommended items for this user
        top_k_recs = set(course for course, _ in recs[:k])
        # Get the test items for this user
        user_test_items = set(item for test_user, item in test_edges if test_user == user)

        # Calculate the coverage for this user and store it in the dictionary
        if user_test_items:  # Avoid division by zero
            coverage_scores[user] = len(top_k_recs & user_test_items) / len(user_test_items)

    return coverage_scores

calculate_user_coverage_at_k(recommendations, test_edges, 3)

{'Remya B': 1.0,
 'Sonal M A': 1.0,
 'Brian M': 0.25,
 'Rohit K': 0.125,
 'Mike Y W': 1.0,
 'Ru C C': 1.0,
 'Ritwik K P': 1.0,
 'David M': 0.0,
 'Marcos P L S': 1.0,
 'Connor Q': 1.0,
 'Lena P': 1.0,
 'Hoang D': 0.0,
 'Stephen': 0.0,
 'Bandhanwar A D': 1.0,
 'Prerna J': 1.0,
 'Ryan H': 0.0,
 'ELY M R A': 1.0,
 'veenit k s': 1.0,
 'Adrian J C': 1.0,
 'Pengchong L': 0.0,
 'Harish J': 1.0,
 'Ismayil J': 0.0,
 'Aiman N': 1.0,
 'John J P G': 0.0,
 'BISWADIP S': 1.0,
 'nuno t': 1.0,
 'David A G': 1.0,
 'Sanam P': 1.0,
 'Sara G': 0.0,
 'Tosin O': 1.0,
 'Tural K': 1.0,
 'Spencer M': 0.0,
 'Ege B': 1.0,
 'James M': 0.125,
 'Daniel': 0.0,
 'Sebastián R R': 1.0,
 'Gaurav C': 0.0,
 'James R': 0.0,
 'Aniket G': 0.0,
 'Abhishek V': 0.3333333333333333,
 'Stefano E': 0.0,
 'VS S P': 1.0,
 'Geoff T': 0.0,
 'Vivek V': 0.0,
 'Raj K': 0.0,
 'Prajakta': 1.0,
 'Rapster J A': 1.0,
 'William J': 0.0,
 'Onyinanya D': 1.0,
 'Mark B': 0.0,
 'PALAK J': 1.0,
 'Logan D': 0.0,
 'David S': 0.07692307692307693,
 'Pedr

In [40]:
hit_rate, mean_precision, mean_recall, mean_f1, mean_ndcg = evaluate_model(test_edges, training_graph, recommendations, top_n=5)

print("Hit Rate: ", hit_rate)
print("Mean Precision: ", mean_precision)
print("Mean Recall: ", mean_recall)
print("Mean F1-Score: ", mean_f1)
print("Mean NDCG: ", mean_ndcg)

Hit Rate:  0.2909090909090909
Mean Precision:  0.1401754385964912
Mean Recall:  0.530874060150376
Mean F1-Score:  0.1952069203405361
Mean NDCG:  0.5149540413883856


In [171]:
test_edges


[('Remya B', 309),
 ('Sonal M A', 309),
 ('Brian M', 220),
 ('Brian M', 68),
 ('Brian M', 176),
 ('Brian M', 260),
 ('Brian M', 403),
 ('Brian M', 348),
 ('Brian M', 514),
 ('Rohit K', 343),
 ('Rohit K', 179),
 ('Rohit K', 481),
 ('Rohit K', 383),
 ('Rohit K', 223),
 ('Rohit K', 342),
 ('Rohit K', 309),
 ('Rohit K', 321),
 ('Rohit K', 264),
 ('Rohit K', 63),
 ('Rohit K', 243),
 ('Rohit K', 273),
 ('Rohit K', 429),
 ('Mike Y W', 309),
 ('Mike Y W', 468),
 ('Ru C C', 309),
 ('Ritwik K P', 309),
 ('David M', 320),
 ('David M', 472),
 ('David M', 382),
 ('David M', 343),
 ('David M', 426),
 ('David M', 475),
 ('David M', 492),
 ('David M', 0),
 ('David M', 188),
 ('David M', 236),
 ('David M', 314),
 ('David M', 92),
 ('David M', 403),
 ('David M', 22),
 ('David M', 114),
 ('David M', 454),
 ('David M', 325),
 ('David M', 43),
 ('David M', 20),
 ('David M', 179),
 ('David M', 535),
 ('David M', 273),
 ('David M', 524),
 ('David M', 25),
 ('Marcos P L S', 309),
 ('Connor Q', 309),
 ('Lena P

In [177]:
result = next((t for t in recommendations["Brian M"] if t[0] == 514), None)

In [178]:
result

(514, 0.0016997822759459982)

In [172]:
recommendations["Brian M"]

[(145, 0.02311713857912701),
 (371, 0.021990456738540068),
 (92, 0.021980472688368054),
 (80, 0.021689088755098028),
 (179, 0.021169372636204956),
 (342, 0.007694099409248211),
 (176, 0.0076890413061067135),
 (153, 0.007606536564933404),
 (499, 0.007088449579755483),
 (335, 0.007044900894718287),
 (40, 0.007025712468401517),
 (447, 0.006873288465698713),
 (501, 0.0065911836231690515),
 (475, 0.006175763643926134),
 (45, 0.006118436613242299),
 (463, 0.00611369979954431),
 (264, 0.006025793008367564),
 (359, 0.005894616736943343),
 (384, 0.005653920307364385),
 (117, 0.004471644718894945),
 (63, 0.004215339946098361),
 (375, 0.004193838960694649),
 (403, 0.004066493185895158),
 (405, 0.004060819306610683),
 (149, 0.003941904893222542),
 (229, 0.0038950566800450104),
 (78, 0.003768745772300557),
 (330, 0.0037439125802712233),
 (285, 0.0037158504977025503),
 (413, 0.00371013972572155),
 (24, 0.0036892306255913564),
 (99, 0.00357019862831532),
 (273, 0.003510031165593253),
 (82, 0.00344426

## Collaborative Filtering User-User and Item-Item

In [57]:
def recommend_courses_user_based(bipartite_graph, user_node):
    recommendations = defaultdict(int)

    # Get the courses that the user has already taken
    taken_courses = {course for course in bipartite_graph.neighbors(user_node) if bipartite_graph.nodes[course]['bipartite'] == 0}

    # For each similar user...
    for similar_user in bipartite_graph[user_node]:
        # Ensure the node is a user node
        if bipartite_graph.nodes[similar_user]['bipartite'] == 1:
            similarity = bipartite_graph[user_node][similar_user]['weight']  # similarity between users
            # ...get the courses that they have taken...
            other_courses = {course for course in bipartite_graph.neighbors(similar_user) if bipartite_graph.nodes[course]['bipartite'] == 0}
            # ...and for each course that the similar user has taken and the user hasn't...
            for course in other_courses - taken_courses:
                # ...increment the recommendation score for that course by the similarity of the users
                recommendations[course] += similarity

    return recommendations


def recommend_courses_item_based(bipartite_graph, user_node):
    recommendations = defaultdict(int)

    # Get the courses that the user has already taken
    taken_courses = {course for course in bipartite_graph.neighbors(user_node) if bipartite_graph.nodes[course]['bipartite'] == 0}

    # For each course that the user has taken...
    for course in taken_courses:
        # ...get similar courses...
        similar_courses = {similar_course for similar_course in bipartite_graph[course] if bipartite_graph.nodes[similar_course]['bipartite'] == 0} # Reverted back to 0 as it seems course nodes are represented as 0.
        # ...and for each similar course that the user hasn't taken...
        for similar_course in similar_courses - taken_courses:
            similarity = bipartite_graph[course][similar_course]['weight']  # similarity between courses
            # ...increment the recommendation score for that course by the similarity of the courses
            recommendations[similar_course] += similarity

    return recommendations





In [59]:
# Step 1: Split data into training set and test set
training_graph, test_edges = split_train_test(bipartite_graph, test_size=0.2)

# Step 2: Generate recommendations based on training data
recommendations_user_cf = {user_node: sorted(recommend_courses_user_based(training_graph, user_node).items(), key=lambda x: x[1], reverse=True) for user_node in training_graph.nodes() if training_graph.nodes[user_node]['bipartite'] == 1}


# Step 3: Evaluate the model on the test set
hit_rate, mean_precision, mean_recall, mean_f1, mean_ndcg = evaluate_model(test_edges, training_graph, recommendations_user_cf, top_n=3)
print("User Based CF")
print("Hit Rate: ", hit_rate)
print("Mean Precision: ", mean_precision)
print("Mean Recall: ", mean_recall)
print("Mean F1-Score: ", mean_f1)
print("Mean NDCG: ", mean_ndcg)

recommendation_item_cf = {user_node: sorted(recommend_courses_item_based(training_graph, user_node).items(), key=lambda x: x[1], reverse=True) for user_node in training_graph.nodes() if training_graph.nodes[user_node]['bipartite'] == 1}

hit_rate, mean_precision, mean_recall, mean_f1, mean_ndcg = evaluate_model(test_edges, training_graph, recommendation_item_cf, top_n=3)
print("Item Based CF")
print("Hit Rate: ", hit_rate)
print("Mean Precision: ", mean_precision)
print("Mean Recall: ", mean_recall)
print("Mean F1-Score: ", mean_f1)
print("Mean NDCG: ", mean_ndcg)

User Based CF
Hit Rate:  0.3220779220779221
Mean Precision:  0.2175438596491228
Mean Recall:  0.5473917967996915
Mean F1-Score:  0.29060743084429647
Mean NDCG:  0.5666994552380996
Item Based CF
Hit Rate:  0.012987012987012988
Mean Precision:  0.008771929824561403
Mean Recall:  0.013891700404858298
Mean F1-Score:  0.008580332409972299
Mean NDCG:  0.014448058149622326


## Cold Start check
We are going to compare if a global PageRank would perform better than just recommending the most popular elements(approach that CF has to take when the user has no history)

In [None]:
def recommend_courses_global_ppr(bipartite_graph):
    # Compute global PageRank
    global_pr = nx.pagerank(bipartite_graph, alpha=0.85, max_iter=1000000, weight='weight')

    # Filter out user nodes and retain only course nodes
    global_pr = {node: rank for node, rank in global_pr.items() if bipartite_graph.nodes[node]['bipartite'] == 0}

    # Return the courses sorted by their PageRank score (high to low)
    return sorted(global_pr.items(), key=lambda x: x[1], reverse=True)

def recommend_most_popular_courses(bipartite_graph):
    # Count the number of users who have taken each course
    course_popularity = {course: len(list(bipartite_graph.neighbors(course))) for course in bipartite_graph.nodes() if bipartite_graph.nodes[course]['bipartite'] == 0}

    # Return the courses sorted by their popularity (high to low)
    return sorted(course_popularity.items(), key=lambda x: x[1], reverse=True)
