Frequently Bought Toghether Items

Inizializzazione, parametri e altre cose che tutti devono sapere

In [5]:
#import numpy as np
#import matplotlib.pyplot as plt
import networkx as nx
import node2vec
from gensim.models import Word2Vec

#Grafo completo FBT
full_graph = nx.Graph()

#Grafo cluster selezionato
clustered_graph = nx.Graph()

#File di lettura
#file_name = "Amazon0302.txt"
file_name = "grafo_esempio.txt"

#Grafo directed se True o uniderected se False
directed = True

#embedding hyperparameters
#default = 1, 1, 10, 80
p=1
q=1
num_walks=10
walk_length=80


In [6]:
### LOAD GRAPH ###
print("loading graph " + file_name)

if directed:
    ##genera il grafo directed utilizzando nx.DiGraph
    full_graph = nx.read_edgelist(file_name, nodetype=int, create_using=nx.DiGraph)
		
else:
    ##genera il grafo undirected utilizzando nx.Graph
    full_graph = nx.read_edgelist(file_name)
    
nx.set_edge_attributes(full_graph, 1, name='weight')
print(full_graph)

print("finished")


loading graph grafo_esempio.txt
DiGraph with 13 nodes and 42 edges
finished


Embedding using stanford's node2vec

In [7]:
G = node2vec.Graph(full_graph, directed, p, q)
G.preprocess_transition_probs()
walks = G.simulate_walks(num_walks, walk_length)


Walk iteration:
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10


In [8]:
walks = [list(map(str, walk)) for walk in walks]
model = Word2Vec(walks, window=10, min_count=0, sg=1, workers=8)
#model.save_word2vec_format("out.emb")

Clustering

In [9]:

print(model)
#k_means_cluster


Word2Vec(vocab=13, vector_size=100, alpha=0.025)


In [10]:
#Clustering global cc
average_cluster_coef = nx.average_clustering(full_graph)

print(f"Coefficiente di clustering medio: {average_cluster_coef}")



#Clustering per ogni nodo (restituisce un dizionario)
node_cluster_coefs = nx.clustering(full_graph)

for node, cluster_coef in node_cluster_coefs.items():
    print(f"Nodo {node}: Coefficiente di clustering = {cluster_coef}")


Coefficiente di clustering medio: 0.441025641025641
Nodo 1: Coefficiente di clustering = 0
Nodo 2: Coefficiente di clustering = 0.3333333333333333
Nodo 3: Coefficiente di clustering = 0
Nodo 4: Coefficiente di clustering = 0.3333333333333333
Nodo 5: Coefficiente di clustering = 0.2
Nodo 7: Coefficiente di clustering = 0
Nodo 6: Coefficiente di clustering = 0.6666666666666666
Nodo 8: Coefficiente di clustering = 0.2
Nodo 9: Coefficiente di clustering = 1.0
Nodo 11: Coefficiente di clustering = 0.5
Nodo 10: Coefficiente di clustering = 0.5
Nodo 12: Coefficiente di clustering = 1.0
Nodo 13: Coefficiente di clustering = 1.0


Valutazione nodi (usare clustered_graph)

In [None]:
#Valutazione nodi con CC o altre metriche

In [11]:
#Ricerca cliques

#lettura file di esempio, costruzione nodes e edges
import os
import networkx as nx

current_directory = os.path.dirname(os.path.realpath("__file__"))
filename = "grafo_esempio.txt"    

graph = nx.Graph()
graph = nx.read_edgelist(filename, nodetype=int, create_using=nx.DiGraph)
nx.set_edge_attributes(graph, 1, name='weight')
print(graph)

print("Edges:", graph.edges)
print("Unique Node IDs:", graph.nodes)

DiGraph with 13 nodes and 42 edges
Edges: [(1, 2), (1, 3), (2, 1), (2, 4), (2, 5), (3, 1), (3, 5), (4, 2), (4, 5), (4, 7), (5, 2), (5, 3), (5, 4), (5, 6), (5, 8), (7, 4), (7, 8), (7, 11), (6, 5), (6, 8), (6, 9), (8, 5), (8, 6), (8, 7), (8, 9), (8, 10), (9, 6), (9, 8), (11, 7), (11, 10), (11, 12), (11, 13), (10, 8), (10, 11), (10, 12), (10, 13), (12, 10), (12, 11), (12, 13), (13, 10), (13, 11), (13, 12)]
Unique Node IDs: [1, 2, 3, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13]


In [12]:
#funzione per il neighbor
def n(v, edges):
    neighbors = set()
    for edge in edges:
        if v in edge:
            neighbors.update(edge)

    neighbors.discard(v)

    return list(neighbors)

#Bron-Kerbosch algorithm with pivot
def BronKerbosch(R, P, X, edges, cliques):
    if not P and not X:
        # P and X are both empty, report R as a maximal clique
        cliques.append(R)
        return

    # Choose a pivot vertex u in P ⋃ X
    pivot = (set(P) | set(X)).pop()

    for v in set(P) - set(n(pivot, edges)):
        # Recursively explore the neighborhood of v
        BronKerbosch(R + [v], list(set(P) & set(n(v, edges))), list(set(X) & set(n(v, edges))), edges, cliques)

        # Remove v from P and add it to X
        P.remove(v)
        X.append(v)

#funzione per la clique massima
def maxClique(cliques):
    return max(cliques, key=len, default=[])

#Ricerca e stampa le cliques e la clique massima
all_cliques = []
node_cliques = []
selected_node = 10
BronKerbosch([], list(graph.nodes), [], graph.edges, all_cliques)
print("Cliques:")
for clique in all_cliques:
    if selected_node in clique:
        node_cliques.append(clique)
        
print(node_cliques)

print("Maximum Clique:", maxClique(node_cliques))

Cliques:
[[8, 10], [10, 11, 12, 13]]
Maximum Clique: [10, 11, 12, 13]


In [13]:
#Clustering global cc
average_cluster_coef = nx.average_clustering(graph)

print(f"Coefficiente di clustering medio: {average_cluster_coef}")



#Clustering per ogni nodo (restituisce un dizionario)
node_cluster_coefs = nx.clustering(graph)

for node, cluster_coef in node_cluster_coefs.items():
    print(f"Nodo {node}: Coefficiente di clustering = {cluster_coef}")


Coefficiente di clustering medio: 0.441025641025641
Nodo 1: Coefficiente di clustering = 0
Nodo 2: Coefficiente di clustering = 0.3333333333333333
Nodo 3: Coefficiente di clustering = 0
Nodo 4: Coefficiente di clustering = 0.3333333333333333
Nodo 5: Coefficiente di clustering = 0.2
Nodo 7: Coefficiente di clustering = 0
Nodo 6: Coefficiente di clustering = 0.6666666666666666
Nodo 8: Coefficiente di clustering = 0.2
Nodo 9: Coefficiente di clustering = 1.0
Nodo 11: Coefficiente di clustering = 0.5
Nodo 10: Coefficiente di clustering = 0.5
Nodo 12: Coefficiente di clustering = 1.0
Nodo 13: Coefficiente di clustering = 1.0


In [14]:
#node clique with max cc
max_cc = 0
best_clique = []
for clique in node_cliques:
    avg_cc = 0;
    for node in clique:
        avg_cc += node_cluster_coefs[node]
        
    if avg_cc/len(clique) > max_cc:
        best_clique = clique
        
print(best_clique)

[10, 11, 12, 13]


In [16]:
#Tentativi con rimozioni casuali
import random

# Numero di nodi da rimuovere ad ogni iterazione
n_nodes_to_remove = 1

# Numero totale di iterazioni
n_iterations = 10

# Copia del grafo iniziale
graph_to_modify = full_graph.copy()

for iteration in range(n_iterations):
    # Rimuovi n nodi casuali dal grafo
    nodes_to_remove = random.sample(list(graph_to_modify.nodes()), n_nodes_to_remove)
    updated_graph = graph_to_modify.copy()
    updated_graph.remove_nodes_from(nodes_to_remove)

    # Stampa o elabora i risultati desiderati
    print(f"Iteration {iteration + 1}: Removed nodes {nodes_to_remove}")
    
    # Aggiorna il grafo originale per la prossima iterazione
    original_graph = updated_graph.copy()


Iteration 1: Removed nodes [6]
Iteration 2: Removed nodes [13]
Iteration 3: Removed nodes [10]
Iteration 4: Removed nodes [4]
Iteration 5: Removed nodes [10]
Iteration 6: Removed nodes [3]
Iteration 7: Removed nodes [1]
Iteration 8: Removed nodes [2]
Iteration 9: Removed nodes [4]
Iteration 10: Removed nodes [10]
