In [None]:
%pip install torch-geometric community python-louvain

In [None]:
from torch_geometric.datasets import Twitch
from torch_geometric.utils import to_networkx
import collections
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import json
import community as community_louvain


(Lien avec Recommendation System)
-> Détection de communauté et proposition d'amis proches

Prédiction d'attribut de node (jeux joués) :

- récupération d'attributs des nodes
- attribut 1 en binaire :
  - si joué ou non
  - prédire si le jeu pourrait l'intéresser

- amis d'amis
- mêmes jeux joués

In [None]:
with open("data/Twitch/FR/musae_FR_features.json") as json_data:
    data_raw = json.load(json_data)

edge_data = pd.read_csv('data/Twitch/FR/musae_FR_edges.csv')
target_data = pd.read_csv('data/Twitch/FR/musae_FR_target.csv')
target_data['mature'] = target_data['mature'].astype(int)

edge_data.head()

In [None]:
target_data.head()

In [None]:
G = nx.Graph()

for index, row in edge_data.iterrows():
    G.add_edges_from([(row['from'], row['to'])])

nx.draw(G, with_labels=False, node_size=50, node_color='skyblue', edge_color='gray')

In [None]:
partition = community_louvain.best_partition(G)
pos = nx.spring_layout(G)
cmap = plt.get_cmap('viridis', max(partition.values()) + 1)

print(set(partition.values()))

In [None]:
legend_labels = []
for community in set(partition.values()):
    nodes = [node for node, comm in partition.items() if comm == community]
    legend_labels.append(f"Community {community}: {len(nodes)} nodes")

print(legend_labels)
plt.figure(figsize=(10, 10))
nx.draw(G, pos, node_size=40, cmap=cmap, node_color=list(partition.values()))
plt.title("Community Detection using Louvain Method")
# plt.legend(legend_labels, loc='best')
plt.axis('off')
plt.show()

Total communities

In [None]:
partitions = list(partition.values())
partition_counts = collections.Counter(partitions)
df_partitions = pd.DataFrame.from_dict(partition_counts, orient='index', columns=['Count'])

df_partitions.plot(kind='bar', legend=False)
plt.xlabel('Partition')
plt.ylabel('Count')
plt.title('Partition Distribution')
plt.show()

In [None]:
# node_id = 0

# def get_neighbors(G, node, depth=1):
#     if (depth == 0):
#         return node
    
#     neighbors = list(G.neighbors(node))
#     if depth == 1:
#         return neighbors
#     else:
#         k_neighbors = []
#         for neighbor in neighbors:
#             k_neighbors += get_neighbors(G, neighbor, depth-1)
#         return list(set(k_neighbors))

# def get_nb_neighbors(neighbors):
#     return len(neighbors)

# def get_similar_neighbors(node_neighbors, k_neighbors):
#     return list(set(node_neighbors).intersection(set(k_neighbors)))

# node_neighbors = get_neighbors(G, 0, 1)
# k_neighbors = get_neighbors(G, 0, 2)

# print(f'Number of neighbors for node {node_id}: {node_neighbors}')
# print(f'Neighbors of node {node_id}: {get_nb_neighbors(node_neighbors)}')
# print()
# print(f'Number of second neighbors of node {node_id}: {k_neighbors}')
# print(f'Second neighbors of node {node_id}: {get_nb_neighbors(k_neighbors)}')
# print()
# print(f'Similar neighbors of node {node_id} and second neighbors: {get_similar_neighbors(node_neighbors, k_neighbors)}')

In [None]:
node_id = 0

In [None]:
def similar_neighbors(G, node, threshold=5):
    similar_neighbors = []
    node_neighbors = list(G.neighbors(node))
    for n in G.nodes():
        if n == node or n in node_neighbors:
            continue
        
        n_neighbors = list(G.neighbors(n))
        similar_n = set(node_neighbors).intersection(set(n_neighbors))

        if len(similar_n) >= threshold:
            similar_neighbors.append(n)

    return list(set(similar_neighbors))

simil_neighbors = similar_neighbors(G, node_id, 7)

print(f'Similar neighbors of node {node_id} with threshold 7: {simil_neighbors}')

In [None]:
node_colors = ['red' if node == node_id else 'green' if node in simil_neighbors else 'blue' for node in G.nodes]
node_sizes = [100 if node == node_id else 60 if node in simil_neighbors else 0.2 for node in G.nodes]

pos = nx.spring_layout(G)
nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes)

Graphe de répartition des jeux pour chaque user

In [None]:
G = nx.Graph()
for streamer, target in data_raw.items():
    G.add_node(streamer)
    G.add_edges_from(list(zip(streamer, target)))

nx.draw(G, with_labels=False, node_size=50, node_color='skyblue', edge_color='gray')

In [None]:
partition = community_louvain.best_partition(G)
pos = nx.spring_layout(G)
cmap = plt.get_cmap('viridis', max(partition.values()) + 1)

plt.figure(figsize=(10, 10))
nx.draw_networkx_nodes(G, pos, partition.keys(), node_size=40, cmap=cmap, node_color=list(partition.values()))
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.title("Community Detection using Louvain Method")
plt.show()

In [None]:
def features_predictions(data, node, threshold=5, neighbors=None):
    feature_predict = []

    def __get_new_features(node, features_predict, n, threshold=5):
        same_features = set(data[str(node)]).intersection(set(data[str(n)]))
        distinct_features = set(data[str(node)]).difference(set(data[str(n)]))

        if len(same_features) >= threshold:
            features_predict += distinct_features

        return features_predict

    if neighbors is not None:
        for n in neighbors:
            feature_predict = __get_new_features(node, feature_predict, n, threshold)
    else:
        for key, value in data.items():
            if key == node:
                continue

            feature_predict = __get_new_features(node, feature_predict, n, threshold)

    return list(set(feature_predict))

feature_predict = features_predictions(data_raw, node_id, 7, simil_neighbors)

print(f'Feature prediction for node {node_id}: {feature_predict}')