In [None]:
%pip install torch-geometric community python-louvain

In [None]:
from torch_geometric.datasets import Twitch
from torch_geometric.utils import to_networkx
import collections
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
# df = pd.read_csv('./datasetV2.csv')

In [None]:
# df.iloc[0]

In [None]:
dataset = []
for name in ['DE', 'FR', 'ES', 'PT', 'RU', 'EN']:
    dataset.append(Twitch(root='./data/Twitch', name=name)[0])
data = dataset[0]
pd.DataFrame(data).head()

In [None]:
edge_index = data.edge_index.numpy()
edge_example = edge_index[:, np.where(edge_index[0]==30)[0]]
edge_example

#### Edge and Node Extraction pour le noeud 30

In [None]:
node_example = np.unique(edge_example.flatten())

plt.figure(figsize=(10, 6))
G = nx.Graph()
G.add_nodes_from(node_example)
G.add_edges_from(list(zip(edge_example[0], edge_example[1])))
nx.draw_networkx(G, with_labels=True, node_color='skyblue', edge_color='gray', node_size=700, font_size=12)
plt.title("Neighborhood of node 30")
plt.show()

In [None]:
G = to_networkx(data, to_undirected=True)
degrees = [val for (node, val) in G.degree()]
degree_desc = pd.DataFrame(pd.Series(degrees).describe()).T.round(2)
print(degree_desc)

plt.figure(figsize=(10, 6))
plt.hist(degrees, bins=50, color='skyblue', edgecolor='black')
plt.xlabel("Node Degree")
plt.ylabel("Frequency")
plt.yscale('log')
plt.title("Degree Distribution")
plt.show()

#### Degré de centralité

In [None]:
pos = nx.spring_layout(G, seed=42)
cent = nx.degree_centrality(G)
node_size = list(map(lambda x: x * 500, cent.values()))
cent_array = np.array(list(cent.values()))
threshold = sorted(cent_array, reverse=True)[10]
print("Centrality threshold", threshold)
cent_bin = np.where(cent_array >= threshold, 1, 0.1)

plt.figure(figsize=(12, 12))
nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size, cmap=plt.cm.plasma,
                               node_color=cent_bin, nodelist=list(cent.keys()), alpha=cent_bin)
edges = nx.draw_networkx_edges(G, pos, width=0.25, alpha=0.3)
plt.title("Network Visualization with Degree Centrality")
plt.show()

In [None]:
print(f'Number of features: {data.num_node_features}')
data.x[0][:20]

In [None]:
import community as community_louvain

partition = community_louvain.best_partition(G)
pos = nx.spring_layout(G)
cmap = plt.get_cmap('viridis', max(partition.values()) + 1)

plt.figure(figsize=(12, 12))
nx.draw_networkx_nodes(G, pos, partition.keys(), node_size=40, cmap=cmap, node_color=list(partition.values()))
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.title("Community Detection using Louvain Method")
plt.show()

In [None]:
counter = collections.Counter(data.y.numpy())
counter = dict(counter)
print(counter)

count = [x[1] for x in sorted(counter.items())]
plt.figure(figsize=(10, 6))
plt.bar(range(2), count, color=['skyblue', 'salmon'])
plt.xlabel("Class", size=20)
plt.ylabel("Count", size=20)
plt.title("Class Distribution")
plt.xticks(ticks=range(2), labels=["Non-explicit", "Explicit"])
plt.show()