In [5]:
import pandas as pd
import networkx as nx
import pickle
import random

In [8]:
'''
Import Precomputed Files
These variables are precomputed, no need to run the whole pipeline
G.pkl                    --> G                 : Network graph
community_mapping.pkl    --> node_to_cluster   : Mapping nodes to clusters
centrality_weighted.csv  --> entrality_values  : Centrality values of each node
webiste_dataset.csv      --> dataset           : The cleansed dataset
'''

with open('G.pkl', 'rb') as f:
  G = pickle.load(f)

with open('community_mapping.pkl', 'rb') as f:
  node_to_cluster = pickle.load(f)

centrality_values = pd.read_csv("centrality_weighted.csv")
centrality_values.rename(columns={'node_id': 'item'}, inplace=True)
centrality_values.set_index("item", inplace=True)

dataset = pd.read_csv('website_dataset.csv')

In [9]:
def cluster_based_recommendation(data, id, G, ego, node_to_cluster, top_n=10, normalized=False, verbose=True):
    centrality_df = pd.read_csv("centrality_weighted.csv", dtype={'item': int})
    centrality_df.set_index("node_id", inplace=True)

    ego_cluster = node_to_cluster.get(int(ego), None)
    if ego_cluster is None:
      raise ValueError("Ego not found in clustering information.")

    cluster_members = [node for node, cluster in node_to_cluster.items()
                       if cluster == ego_cluster and node != int(ego)]

    suffix = "_normalized" if normalized else ""
    col_AC = f"average_centrality{suffix}"
    col_DC = f"degree{suffix}"
    col_CC = f"closeness{suffix}"
    col_BC = f"betweenness{suffix}"

    CEF_records = []
    for neighbor in cluster_members:
        try:
            distance = nx.shortest_path_length(G, source=int(ego), target=neighbor)
            if distance > 0:
                ac = centrality_df.at[neighbor, col_AC]
                cef = ac / distance
                CEF_records.append({
                    "id": neighbor,
                    "CEF": cef,
                    "links": distance,
                    "DC": centrality_df.at[neighbor, col_DC],
                    "BC": centrality_df.at[neighbor, col_BC],
                    "CC": centrality_df.at[neighbor, col_CC]
                })
        except (nx.NetworkXNoPath, KeyError):
            continue

    cef_df = pd.DataFrame(CEF_records)
    cef_df = cef_df.sort_values(by="CEF", ascending=False).head(top_n)
    result = data.merge(cef_df, how="inner", on="id")
    result = result.sort_values(by="CEF", ascending=False)

    if verbose and not data[data[id] == ego].empty:
        ego_row = data[data[id] == ego].iloc[0]
        print(f'⭐ \033[1mCourse Title\033[0m\t\t: {ego_row["title"]}')
        print(f'🆔 Course ID\t\t: {int(ego)}')
        print(f'📌 Category\t\t: {ego_row["category"]}')
        print(f'➡️ Subcategory\t\t: {ego_row["subcategory"]}\n')

    return result[['id', 'title', 'category', 'subcategory', 'DC', 'BC', 'CC', 'links', 'CEF']]



cluster_based_recommendation(dataset, id='id', G=G, ego=random.choice(dataset['id']), node_to_cluster=node_to_cluster, top_n=10, normalized=False, verbose=True)

⭐ [1mCourse Title[0m		: Autocad for everyone Step by Step
🆔 Course ID		: 2230504
📌 Category		: IT & Software
➡️ Subcategory		: Other IT & Software



Unnamed: 0,id,title,category,subcategory,DC,BC,CC,links,CEF
5,2386136,Editing Videos From Start To Finish using Adob...,IT & Software,Other IT & Software,2e-05,0.000182,0.17279,1,0.057664
6,2458592,Oracle Absence Management and SSHR Training Bu...,IT & Software,Other IT & Software,1.5e-05,0.000243,0.176296,2,0.029426
3,2328224,Complete Cryptography Master Class Part-1,IT & Software,Other IT & Software,2e-05,0.000121,0.169132,2,0.028212
9,2710000,Oracle BI Publisher Training,IT & Software,Other IT & Software,2.6e-05,0.000516,0.179105,3,0.019961
2,2317914,Oracle Fusion Cloud Accounts Payable Training,IT & Software,Other IT & Software,3e-06,0.0,0.17465,3,0.019406
8,2708330,An Introduction to Cryptographic Techniques,IT & Software,Other IT & Software,1.4e-05,3e-05,0.165847,3,0.018432
0,2113498,How to make Cutscenes With Playmaker in Unity,IT & Software,Other IT & Software,1.4e-05,3e-05,0.165651,3,0.018411
4,2384264,SAP HANA Vendor and Customer as BUSINESS PARTN...,IT & Software,Other IT & Software,1.6e-05,0.000547,0.18312,4,0.015307
1,2136922,End-to-End Real-World Application Development ...,IT & Software,Other IT & Software,3e-05,0.000121,0.175588,4,0.014645
7,2566370,Proxmox VE 6: Learn Virtualization With Proxmo...,IT & Software,Other IT & Software,2.2e-05,6.1e-05,0.175643,4,0.014644
