In [None]:
import pandas as pd
import networkx as nx
from collections import Counter
import matplotlib.pyplot as plt
import igraph as ig

from networkx.algorithms.community import louvain_communities

## Calculate bidirectional similarity between users 

In [None]:
userchannel = pd.read_pickle("./data/final/user-channel-0.5-0.5.pkl")

In [None]:
userchannel.head(3)

In [None]:
di_graph = nx.DiGraph()
di_graph.add_nodes_from(userchannel.userid.unique())
di_graph.add_nodes_from(userchannel.channelid.unique())

In [None]:
for row in userchannel.itertuples():
    di_graph.add_edge(row.userid, row.channelid, weight=row.score)

In [None]:
len(di_graph.edges)

In [None]:
print(f"Density of directed graph: {nx.density(di_graph)}")

In [None]:
sorted(Counter([len(community) for community in communities]).items())[-10:]

In [None]:
print(
    f"The 10 largest connected components are: {[len(c) for c in sorted(nx.connected_components(graph), key=len, reverse=True)][:10]}"
)

In [None]:
core2 = nx.k_core(di_graph, k=2)
core4 = nx.k_core(di_graph, k=4)
core7 = nx.k_core(di_graph, k=7)
core10 = nx.k_core(di_graph, k=10)
core30 = nx.k_core(di_graph, k=30)

core_main = nx.k_core(di_graph)

## Cluster graphs

In [None]:
test = pd.read_pickle("./data/final/acquaintances_cf.pkl")

In [None]:
test.head(5)

In [None]:
useruser = pd.read_pickle(f"./data/final/user-user-{0.5}-{0.5}-{0.5}.pkl")

In [None]:
 "50f294244cd7b76fbeb44959175a96a5" in useruser[useruser.userid == "cfcd0ae6e1db87e50ff513de7b840a6a"].neighbourid.to_list()

In [None]:
neighbors = {}

for i, row in useruser.iterrows():
    print(f"Processing row {i}", end="\r")
    if row.userid in neighbors.keys():
        neighbors[row.userid][row.neighbourid] = row.score
    else:
        neighbors[row.userid] = {row.neighbourid: row.score}

In [None]:
import leidenalg as la

graph = nx.DiGraph()
graph.add_nodes_from(useruser.userid.unique())

for row in useruser.itertuples():
    graph.add_edge(row.userid, row.neighbourid, weight=row.score)

ig_graph = ig.Graph.from_networkx(graph)
ig_graph

# clusters = ig.Graph.community_leiden(
#     ig_graph, weights="weight", n_iterations=100, objective_function="modularity"
# )

partitions = la.find_partition(ig_graph, la.ModularityVertexPartition)

In [None]:
for partition in partitions:
    print(partition)

In [None]:
for j in [0,0.5,1]:
    for k in [0,0.5,1]:
        for l in [0,0.5,1]:
            useruser = pd.read_pickle(f"./data/final/user-user-{l}-{j}-{k}.pkl")
            graph = nx.DiGraph()
            graph.add_nodes_from(useruser.userid.unique())
            graph.add_nodes_from(useruser.neighbourid.unique())
            
            for row in useruser.itertuples():
                graph.add_edge(row.userid, row.neighbourid, weight=row.score)
                
            ig.Graph
            ig_graph = ig.Graph.from_networkx(graph)
            
            ig_to_mattermost = (
                ig_graph.get_vertex_dataframe()
                .reset_index()
                .rename(columns={"vertex ID": "igid", "_nx_name": "originid"})
            )
            
            ig_to_mattermost_dict = pd.Series(
                ig_to_mattermost["originid"].values, index=ig_to_mattermost.igid
            ).to_dict()
            
            # clusters = ig.Graph.community_leiden(
            #     ig_graph, weights="weight", n_iterations=100, objective_function="modularity"
            # )

            clusters = la.find_partition(ig_graph, la.ModularityVertexPartition)
            
            print(f"Configuration with j: {j}, k: {k}, l: {l} has {len(clusters)} clusters.", end="\r")
            
            clusters_for_pkl = []

            for c, cluster in enumerate(clusters):
                clusters_for_pkl.append(
                    {"cluster": c, "nodes": [ig_to_mattermost_dict[node] for node in cluster]}
                )
                
            pd.DataFrame(clusters_for_pkl).to_pickle(f"./data/final/clusters-{l}-{j}-{k}.pkl")

### Calculate modularity for each graph

In [None]:
modularities = []

for j in [0,0.5,1]:
    for k in [0,0.5,1]:
        for l in [0,0.5,1]:
            useruser = pd.read_pickle(f"./data/final/user-user-{l}-{j}-{k}.pkl")
            graph = nx.Graph()
            graph.add_nodes_from(useruser.userid.unique())

            for row in useruser.itertuples():
                graph.add_edge(row.userid, row.neighbourid, weight=row.score)

            clusters = [set(cluster) for cluster in pd.read_pickle(f"./data/final/clusters-{l}-{j}-{k}.pkl").nodes.to_list()]
            modularity = nx.algorithms.community.modularity(graph, clusters)
            print(f"Modulairty for j={j}, k={k} and l={l} is {modularity}", end="\r")
            modularities.append({
                "j": j,
                "k": k,
                "l": l,
                "modularity": modularity
            })

In [None]:
pd.DataFrame(modularities).sort_values(by="modularity").reset_index(drop=True)