### IMPORTS

In [None]:
!pip install --user cdlib

In [None]:
!pip install --user leidenalg 

In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

import networkx as nx
from cdlib import algorithms
from networkx.algorithms import community
from community import community_louvain
from networkx.algorithms import bipartite
from tqdm import tqdm
import time
from operator import itemgetter


### DF OVERVIEW (not real data)

In [7]:
df[['Client ID','Page ID','Data']].head(10)

Unnamed: 0,Client ID,Page ID,Data
0,4,Page 0,20220623
1,9,Page 1,20220408
2,2,Page 2,20220609
3,7,Page 3,20220406
4,0,Page 4,20220430
5,5,Page 5,20220505
6,3,Page 6,20220126
7,6,Page 7,20220429
8,8,Page 8,20220302
9,1,Page 9,20220114


- Cleaning and preprocessing

In [None]:
df['Client ID']=df['Client ID'].astype(str)

le = preprocessing.LabelEncoder()
df['Client ID']=le.fit_transform(df['Client ID'])

def month_extr(inpt):
    return str(inpt)[4:6]
df['month']=df['Data'].apply(month_extr)

##  Bipartite Graph

In [None]:
df=df[df['month'].isin(['01','02','03','04','05','06','07','08'])]
df=df[['Client ID','Page ID','month']]
df.columns=['Client ID','Page ID','month']

- The nodes are both the Client ID and the Page ID

#### EDGES CREATION

In [None]:
sources = []
targets = []
edges = {}
for row in tqdm(df.iterrows()):
    source = str(row[1]["Client ID"])
    targets = row[1]["Page ID"]
    source = source.lower()
    if targets != []:
        weight = (source, str(targets).lower())
        if weight in edges:
            edges[weight] += 1
        else:
            edges[weight] = 1

#### GRAPH CREATION WITH NETWORKX

In [None]:
DG = nx.Graph()
list_0=[]
list_1=[]
for k, v in edges.items():
    source = k[0]
    list_0.append(source)
    target = k[1]
    list_1.append(target)
    weight = v
    DG.add_edge(source, target, weight = weight)

- Pairs of Nodes and links assignment

In [None]:
G = nx.DiGraph() #valutare se sia direzionato o no
G.add_nodes_from(list_0, bipartite=0) 
G.add_nodes_from(list_1, bipartite=1)
for k, v in edges.items():
    source = k[0]
    target = k[1]
    weight = v
    G.add_edge(source, target, weight = weight)

In [None]:
G = G.to_undirected()

In [None]:
nx.info(G)

## Community detection

 Cdlib https://cdlib.readthedocs.io/en/latest/reference/cd_algorithms/node_clustering.html 


#### Louvain

In [None]:
partition = community_louvain.best_partition(G)

# Get a set of the communities
#communities_louvain = set(partition.values())

# Create a dictionary mapping community number to nodes within that community
#communities_louvain_dict = {c: [k for k, v in partition.items() if v == c] for c in communities_louvain}

# Assign each to an attribute in your network
nx.set_node_attributes(G, partition, 'community_louvain')

#### CORE
- Apply core to have a cleaner vision of the clusters.
- Instead of forcing a cluster to each node like Louvain, Core assigns the label "-1" to a generic cluster. Moreover only the unique pages for each cluster are stored in order to avoid overlapping within multiple clusters

In [None]:
coms = algorithms.core_expansion(G)

In [None]:
def remove_common(a, b):
 
    a = [i for i in a if i not in b]
    
    return a


for tnode in list(G.nodes):
    G.nodes[tnode].pop('coreValue',None)

core_community=[]

for xx in tqdm(range(0,len(coms.communities))):
    core = coms.communities[xx]
    for jj in range(0,len(coms.communities)):
        if (xx != jj):

            core = remove_common(core, coms.communities[jj]) 
            
    core_community.append(core)
    
com_dict={}
for i,c in enumerate(core_community):
    com_dict[i]=c

part={}
for k in com_dict.keys():
    for v in com_dict[k]:
        part[v]=int(k)
        
 
 
main_list = list( set(list(G.nodes)) - set(list(part.keys())) )

for k in main_list:
    part[k] = -1
    


In [None]:
nx.set_node_attributes(G, part, 'core')

#### TO EXPORT

In [None]:
nx.write_gexf(G, "FILENAME.gexf")