In [1]:
import pandas as pd
import pandas as pd
import networkx as nx
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import os
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Lecture des graphes GEXF
G_day1 = nx.read_gexf('data/sp_data_school_day_1_g.gexf')
G_day2 = nx.read_gexf('data/sp_data_school_day_2_g.gexf')

print("Jour 1 : {} noeuds, {} arêtes".format(G_day1.number_of_nodes(), G_day1.number_of_edges()))
print("Jour 2 : {} noeuds, {} arêtes".format(G_day2.number_of_nodes(), G_day2.number_of_edges()))

In [None]:
import networkx as nx
import pandas as pd

# ----------------------------
# Charger les graphes
# ----------------------------
G_day1 = nx.read_gexf('data/sp_data_school_day_1_g.gexf')
G_day2 = nx.read_gexf('data/sp_data_school_day_2_g.gexf')

# ----------------------------
# Fonction pour calculer les stats par classe
# ----------------------------
def compute_class_stats(G, day_label):
    data = []

    for u, v, d in G.edges(data=True):
        # Attributs des nœuds
        class_u = G.nodes[u].get("class", "Unknown")
        class_v = G.nodes[v].get("class", "Unknown")

        # Poids = durée du contact
        duration = d.get("weight", 0)

        # Ajouter deux lignes : une pour chaque individu
        data.append([day_label, class_u, duration])
        data.append([day_label, class_v, duration])

    df = pd.DataFrame(data, columns=["day", "class", "duration_sec"])

    # Convertir en heures si duration est en secondes
    df["duration_hours"] = df["duration_sec"] / 3600.0

    # Agrégation
    summary = (
        df.groupby(["day", "class"])
          .agg(
              total_duration_hours = ("duration_hours", "sum"),
              number_of_contacts  = ("duration_sec", "count")
          )
          .reset_index()
    )

    return summary

# ----------------------------
# Calculer les stats pour les deux jours
# ----------------------------
stats_day1 = compute_class_stats(G_day1, "Day 1")
stats_day2 = compute_class_stats(G_day2, "Day 2")

# Fusionner
stats = pd.concat([stats_day1, stats_day2], ignore_index=True)

# Affichage
print(stats)

# Export CSV
stats.to_csv("class_contact_stats.csv", index=False)
print("\nFichier exporté : class_contact_stats.csv")


In [None]:
import networkx as nx
import pandas as pd

# ----------------------------
# Charger les deux graphes
# ----------------------------
G1 = nx.read_gexf("data/sp_data_school_day_1_g.gexf")
G2 = nx.read_gexf("data/sp_data_school_day_2_g.gexf")  # ⚠ change selon ton fichier réel

# ----------------------------
# Fonction d’agrégation par classe
# ----------------------------
def compute_class_stats(G, day_label):
    rows = []

    for u, v, d in G.edges(data=True):

        # Classe des deux individus
        class_u = G.nodes[u].get("classname", "Unknown")
        class_v = G.nodes[v].get("classname", "Unknown")

        # Durée (secondes) + nombre de contacts
        duration = d.get("duration", 0)
        count = d.get("count", 0)

        # Ajouter une entrée pour chacun des deux individus
        rows.append([day_label, class_u, duration, count])
        rows.append([day_label, class_v, duration, count])

    df = pd.DataFrame(rows, columns=["day", "class", "duration_sec", "contact_count"])

    # Convertir les durées en heures
    df["duration_hours"] = df["duration_sec"] / 3600

    # Agrégation par classe
    summary = (
        df.groupby(["day", "class"])
          .agg(
              total_duration_hours=("duration_hours", "sum"),
              total_contact_events=("contact_count", "sum")
          )
          .reset_index()
    )

    return summary

# ----------------------------
# Calcul des statistiques
# ----------------------------
stats_day1 = compute_class_stats(G1, "Day 1")
stats_day2 = compute_class_stats(G2, "Day 2")

stats = pd.concat([stats_day1, stats_day2], ignore_index=True)

print(stats)

# Export CSV
stats.to_csv("class_contact_stats.csv", index=False)
print("\nStats exportées vers class_contact_stats.csv")


In [None]:
classes = sorted(stats['class'].unique())

# Récupérer les données pour Day 1 et Day 2
day1 = stats[stats['day'] == "Day 1"].set_index('class').reindex(classes, fill_value=0)
day2 = stats[stats['day'] == "Day 2"].set_index('class').reindex(classes, fill_value=0)

# --- Graphique 1 : Nombre de contacts ---
x = np.arange(len(classes))
width = 0.35

plt.figure(figsize=(8,6))
plt.bar(x - width/2, day1['total_contact_events'], width, label='Day 1', color='green')
plt.bar(x + width/2, day2['total_contact_events'], width, label='Day 2', color='brown')

plt.xlabel('Classe')
plt.ylabel('Nombre total de contacts')
plt.title('Comparaison du nombre de contacts par classe')
plt.xticks(x, classes, rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# --- Graphique 2 : Durée totale des contacts ---
plt.figure(figsize=(8,6))
plt.bar(x - width/2, day1['total_duration_hours'], width, label='Day 1', color='green')
plt.bar(x + width/2, day2['total_duration_hours'], width, label='Day 2', color='brown')

plt.xlabel('Classe')
plt.ylabel('Durée totale des contacts (heures)')
plt.title('Comparaison de la durée totale des contacts par classe')
plt.xticks(x, classes, rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt

def build_contact_matrix(G, value='count'):
    """
    Crée une matrice de contacts entre classes à partir d'un graphe.
    value : 'count' pour le nombre de contacts, 'duration' pour la durée totale
    """
    # Liste des classes
    classes = sorted({data.get('classname', 'Unknown') for _, data in G.nodes(data=True)})
    
    # Initialiser la matrice
    matrix = pd.DataFrame(0, index=classes, columns=classes)
    
    # Remplir la matrice
    for u, v, d in G.edges(data=True):
        class_u = G.nodes[u].get('classname', 'Unknown')
        class_v = G.nodes[v].get('classname', 'Unknown')
        
        val = d.get(value, 0)
        if value == 'duration':  # convertir en heures si durée
            val = val / 3600
            
        matrix.loc[class_u, class_v] += val
        matrix.loc[class_v, class_u] += val  # symétrique
    
    return matrix

# Construire les matrices pour Day 1 et Day 2
matrix_day1 = build_contact_matrix(G1, value='count')
matrix_day2 = build_contact_matrix(G2, value='count')

# --- Visualisation côte à côte ---
fig, axes = plt.subplots(1, 2, figsize=(18, 8))

sns.heatmap(matrix_day1, annot=True, fmt='g', cmap='YlOrRd', ax=axes[0])
axes[0].set_title('Matrice des contacts entre classes - Day 1')
axes[0].set_xlabel('Classe')
axes[0].set_ylabel('Classe')

sns.heatmap(matrix_day2, annot=True, fmt='g', cmap='YlOrRd', ax=axes[1])
axes[1].set_title('Matrice des contacts entre classes - Day 2')
axes[1].set_xlabel('Classe')
axes[1].set_ylabel('Classe')

plt.tight_layout()
plt.show()


In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# Copier le graphe Day 1 pour ne pas modifier l'original
G_strong = G1.copy()

# Supprimer les arêtes dont la durée est < 120 secondes
edges_to_remove = [(u, v) for u, v, d in G_strong.edges(data=True) if d.get('duration', 0) < 120]
G_strong.remove_edges_from(edges_to_remove)

print(f"Nombre de nœuds : {G_strong.number_of_nodes()}")
print(f"Nombre d'arêtes après filtrage : {G_strong.number_of_edges()}")


In [None]:
plt.figure(figsize=(12, 12))

# Layout pour le graphe
pos = nx.spring_layout(G_strong, seed=42)  # disposition automatique

# Dessiner les nœuds (taille proportionnelle au nombre de contacts)
node_sizes = [100 + 10 * G_strong.degree(n) for n in G_strong.nodes()]
nx.draw_networkx_nodes(G_strong, pos, node_size=node_sizes, node_color='skyblue')

# Dessiner les arêtes (largeur proportionnelle à la durée en minutes)
edge_weights = [d.get('duration', 0)/60 for u, v, d in G_strong.edges(data=True)]
nx.draw_networkx_edges(G_strong, pos, width=edge_weights, edge_color='white')

# Labels (optionnel : nœuds ou classes)
nx.draw_networkx_labels(G_strong, pos, font_size=8)

plt.title("Réseau de contacts Day 1 (seulement contacts ≥ 2 minutes)")
plt.axis('off')
plt.show()


In [None]:


# Déterminer nombre de nœuds (en supposant mêmes nœuds pour les deux jours)
nodes = list(G_day1.nodes())
n_nodes = len(nodes)
node_index = {node: idx for idx, node in enumerate(nodes)}

# ----------------------------
# 2. Mesures de centralité
# ----------------------------

def compute_centrality_measures(G):
    degree = dict(G.degree())
    strength = dict(G.degree(weight='weight'))
    betweenness = nx.betweenness_centrality(G, weight='weight')
    return degree, strength, betweenness

deg1, str1, betw1 = compute_centrality_measures(G_day1)
deg2, str2, betw2 = compute_centrality_measures(G_day2)

# ----------------------------
# 3. Détection de communautés
# ----------------------------

from networkx.algorithms import community

def detect_communities(G):
    return list(community.greedy_modularity_communities(G, weight='weight'))

coms_day1 = detect_communities(G_day1)
coms_day2 = detect_communities(G_day2)

print("\nCommunautés jour 1 :", [list(c) for c in coms_day1])
print("Communautés jour 2 :", [list(c) for c in coms_day2])

# ----------------------------
# 4. Cosine similarity des voisinages
# ----------------------------
def adjacency_matrix(G):
    A = np.zeros((n_nodes, n_nodes))
    for u, v, data in G.edges(data=True):
        i, j = node_index[u], node_index[v]
        A[i, j] = data.get('weight', 1)
        A[j, i] = data.get('weight', 1)  # Graphe non orienté
    return A

# ----------------------------
# 5. Optionnel : visualisation rapide
# ----------------------------

import matplotlib.pyplot as plt

plt.figure(figsize=(8,6))
pos = nx.spring_layout(G_day1, seed=42)
nx.draw(G_day1, pos, with_labels=True, node_size=[deg1[n]*100 for n in G_day1.nodes()],
        node_color='skyblue', edge_color='gray')
plt.title("Graphe des contacts - Jour 1")
plt.show()
