In [None]:
#!pip install python-louvain

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import community
import numpy as np
from networkx.algorithms import cuts
import pandas as pd

In [None]:
# Load karate graph
G = nx.karate_club_graph()

In [None]:
# Girvan-Newman
girvan_newman_generator = community.girvan_newman(G)
# best_modularity = -1
# best_partition = None
# while True:
#     try:
#         current_partition = next(girvan_newman_generator)
#         current_modularity = community.modularity(G, current_partition)
#         if current_modularity > best_modularity:
#             best_modularity = current_modularity
#             best_partition = current_partition
#     except StopIteration:
#         break
# girvan_newman_partition = best_partition
girvan_newman_partition = max(girvan_newman_generator, key=lambda partition: community.modularity(G, partition))

In [None]:
# Label Propagation
label_propagation_partition = community.label_propagation_communities(G)

In [None]:
# Detect communities using different methods
# Louvain (using alternative)
louvain_partition = list(community.louvain_communities(G))

In [None]:
# Add function to calculate metrics
def calculate_metrics(G, communities):
    metrics = {
        'num_communities': len(communities),
        'modularity': community.modularity(G, communities),
        'conductance': np.mean([cuts.conductance(G, c) for c in communities]),
        'normalized_cut': np.mean([cuts.normalized_cut_size(G, c) for c in communities])
    }
    return metrics

In [None]:
def create_community_node_colors(graph, communities):
    node_colors = []
    for node in graph:
        for idx, comm in enumerate(communities):
            if node in comm:
                node_colors.append(plt.cm.jet(idx / len(communities)))
                break
    return node_colors

def visualize_communities(graph, communities, title, ax):
    node_colors = create_community_node_colors(graph, communities)
    pos = nx.spring_layout(graph, k=0.3, iterations=50, seed=2)
    modularity = community.modularity(graph, communities)

    ax.set_title(f"{title}\n{len(communities)} communities (Modularity: {modularity:.3f})")
    nx.draw(graph, pos=pos, node_size=500, node_color=node_colors,
            with_labels=True, font_size=10, ax=ax)

In [None]:
# Plot communities
fig, axes = plt.subplots(1, 3, figsize=(20, 6))

visualize_communities(G, louvain_partition, "Louvain", axes[0])
visualize_communities(G, label_propagation_partition, "Label Propagation", axes[1])
visualize_communities(G, girvan_newman_partition, "Girvan-Newman", axes[2])

In [None]:
# Create DataFrame for metrics comparison
metrics_data = []
algorithms = ["Louvain", "Label Propagation", "Girvan-Newman"]

for partition, algorithm in zip([louvain_partition, label_propagation_partition, girvan_newman_partition], algorithms):
    metrics = calculate_metrics(G, partition)
    metrics_data.append({
        'Algorithm': algorithm,
        'Num Communities': metrics['num_communities'],
        'Modularity': metrics['modularity'],
        'Conductance': metrics['conductance'],
        'Normalized Cut': metrics['normalized_cut']
    })

metrics_df = pd.DataFrame(metrics_data)

# Plot the comparison of metrics
fig, axes = plt.subplots(1, 2, figsize=(18, 7))

# Bar chart for comparing 'Num Communities' between algorithms
axes[0].bar(metrics_df['Algorithm'], metrics_df['Num Communities'], color="#8E44AD")
axes[0].set_title("Num Communities Comparison")
axes[0].set_xlabel("Algorithm")
axes[0].set_ylabel("Num Communities")

# Bar chart for comparing Modularity, Conductance, and Normalized Cut for each algorithm
bar_width = 0.6  # Increase the width of the bars
x = np.arange(len(metrics_df['Algorithm']))  # X positions for each algorithm

axes[1].bar(x - bar_width/2, metrics_df['Modularity'], width=bar_width, label='Modularity', color="#F2D140")
axes[1].bar(x - bar_width/2, metrics_df['Conductance'], width=bar_width, label='Conductance', color="#FF6347", bottom=metrics_df['Modularity'])
axes[1].bar(x - bar_width/2, metrics_df['Normalized Cut'], width=bar_width, label='Normalized Cut', color="#48C9B0", bottom=metrics_df['Modularity'] + metrics_df['Conductance'])

axes[1].set_title("Comparison of Metrics")
axes[1].set_xlabel("Algorithm")
axes[1].set_ylabel("Value")
axes[1].set_xticks(x)
axes[1].set_xticklabels(metrics_df['Algorithm'])
axes[1].legend()

plt.tight_layout()
plt.show()

# Yêu cầu:

1. Viết lại bài Lab03.02 thành class, xây dựng pipeline áp dụng cho đồ thị mạng xã hội bất kỳ.
2. Giới thiệu tổng quan về thuật toán louvain
3. Tóm tắt tổng quan các chỉ số đánh giá việc phát hiện cộng đồng (công thức, phạm vi, như thế nào là tốt, ....  )
4. Thực hiện phân tích việc (nhận xét)
phát hiện cộng đồng cho mạng xã hội tự chọn

### Viết lại bài Lab03.02 thành class, xây dựng pipeline áp dụng cho đồ thị mạng xã hội bất kỳ.

In [None]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

class CommunityAnalyzer:
    def __init__(self, G):
        self.G = G
        self.communities = list(nx.community.girvan_newman(G))

    def modularity_ratio(self, G, community):
        """Calculate modularity ratio for a community"""
        community = list(community)
        edges_between_community = 0
        for node in community:
            neighbors = set(G.neighbors(node))
            edges_between_community += sum(1 for n in neighbors if n in community)
        edges_between_community //= 2
        expected_edges = sum(G.degree(node) for node in community)**2 / (2 * G.size())
        return edges_between_community / expected_edges if expected_edges != 0 else 0

    def volume(self, G, community):
        """Calculate volume of a community"""
        return sum(G.degree(node) for node in list(community))

    def edges_cut(self, G, community):
        """Calculate number of edges cut for a community"""
        community = list(community)
        return sum(1 for node in community
                  for neighbor in G.neighbors(node)
                  if neighbor not in community)

    def analyze_communities(self):
        """Analyze communities and calculate metrics"""
        metrics_data = []
        for k in range(len(self.communities)//2):
            for community in self.communities[k]:
                metrics_data.append({
                    'k': k + 1,
                    'modularity': nx.community.modularity(self.G, self.communities[k]),
                    'modularity_ratio': self.modularity_ratio(self.G, community),
                    'volume': self.volume(self.G, community),
                    'edges_cut': self.edges_cut(self.G, community)
                })
        return pd.DataFrame(metrics_data)

    def plot_metrics(self, metrics_df):
        print(metrics_df)
        """Create visualizations for metrics and communities"""
        # First figure: Community visualizations and modularity
        fig1, axes1 = plt.subplots(3, 1, figsize=(15, 20))
        fig1.suptitle('Community Structure Analysis', fontsize=16)

        # Plot communities
        for idx, k in enumerate([0, 3]):
            communities = self.communities[k]
            modularity = round(nx.community.modularity(self.G, communities), 6)
            pos = nx.spring_layout(self.G, k=0.3, iterations=50, seed=2)

            axes1[idx].set_title(f"{len(communities)} Communities (Modularity: {modularity})")
            colors = ["#D4FCB1", "#CDC5FC", "#FFC2C4", "#F2D140", "#BCC6C8"]
            node_colors = [colors[list(map(lambda c: node in c, communities)).index(True) % len(colors)]
                         for node in self.G.nodes()]

            nx.draw(self.G, pos=pos, node_color=node_colors, node_size=1000,
                   with_labels=True, font_size=10, font_color="black", ax=axes1[idx])

        # Plot modularity trend
        metrics_df.groupby('k')['modularity'].first().plot(
            ax=axes1[2], color="#F2D140", marker='o',
            title="Modularity Trend")
        axes1[2].set_xlabel("Number of Communities (k)")
        axes1[2].set_ylabel("Modularity")
        axes1[2].grid(True, alpha=0.3)

        # Second figure: Other metrics
        fig2, axes2 = plt.subplots(3, 1, figsize=(15, 20))
        fig2.suptitle('Community Metrics Analysis', fontsize=16)

        metrics = {
            'modularity_ratio': 'Modularity Ratio',
            'volume': 'Volume',
            'edges_cut': 'Edges Cut'
        }

        for idx, (metric, title) in enumerate(metrics.items()):
            metrics_df.groupby('k')[metric].mean().plot(
                ax=axes2[idx], color="#F2D140", marker='o',
                title=f"{title} Trend")
            axes2[idx].set_xlabel("Number of Communities (k)")
            axes2[idx].set_ylabel(title)
            axes2[idx].grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()

        return fig1, fig2

def main():
    # Create and analyze karate club network
    G = nx.karate_club_graph()
    print(f"Network Info: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")

    # Initialize analyzer and calculate metrics
    analyzer = CommunityAnalyzer(G)
    metrics_df = analyzer.analyze_communities()

    # Display metrics
    print("\nCommunity Detection Metrics Summary:")
    summary = metrics_df.groupby('k').agg({
        'modularity': 'first',
        'modularity_ratio': 'mean',
        'volume': 'mean',
        'edges_cut': 'mean'
    }).round(3)
    print(summary)

    # Create visualizations
    analyzer.plot_metrics(metrics_df)

if __name__ == "__main__":
    main()

In [None]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from networkx.algorithms.community import girvan_newman

class SocialNetworkCommunityAnalyzer:
    def __init__(self, G):
        """
        Khởi tạo với đồ thị đầu vào.
        """
        self.G = G
        self.communities = []
        self.metrics_df = None

    def detect_communities(self):
        """
        Phát hiện cộng đồng bằng thuật toán Girvan-Newman.
        """
        self.communities = list(girvan_newman(self.G))
        print(f"Đã phát hiện {len(self.communities)} phân vùng cộng đồng.")
        return self.communities

    @staticmethod
    def calculate_modularity(G, communities):
        """
        Tính toán modularity của phân vùng cộng đồng.
        """
        return nx.community.modularity(G, communities)

    @staticmethod
    def calculate_volume(G, community):
        """
        Tính volume của một cộng đồng.
        """
        return sum(G.degree(node) for node in community)

    @staticmethod
    def calculate_edges_cut(G, community):
        """
        Tính số lượng cạnh cắt (edges cut) của một cộng đồng.
        """
        return sum(1 for node in community for neighbor in G.neighbors(node) if neighbor not in community)

    def analyze_communities(self):
        """
        Phân tích các cộng đồng và lưu trữ các chỉ số trong DataFrame.
        """
        metrics_data = []
        for k, communities in enumerate(self.communities):
            modularity = self.calculate_modularity(self.G, communities)
            for community in communities:
                metrics_data.append({
                    'k': k + 1,
                    'modularity': modularity,
                    'volume': self.calculate_volume(self.G, community),
                    'edges_cut': self.calculate_edges_cut(self.G, community),
                    'community_size': len(community)
                })
        self.metrics_df = pd.DataFrame(metrics_data)
        return self.metrics_df

    def plot_metrics(self):
        """
        Tạo biểu đồ trực quan hóa các chỉ số.
        """
        if self.metrics_df is None:
            print("Hãy phân tích cộng đồng trước khi trực quan hóa!")
            return

        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle("Community Analysis Metrics", fontsize=16)

        # Modularity trend
        self.metrics_df.groupby('k')['modularity'].first().plot(
            ax=axes[0, 0], color='blue', marker='o', title='Modularity Trend'
        )
        axes[0, 0].set_xlabel("Number of Communities (k)")
        axes[0, 0].set_ylabel("Modularity")

        # Volume trend
        self.metrics_df.groupby('k')['volume'].mean().plot(
            ax=axes[0, 1], color='green', marker='o', title='Average Volume per Community'
        )
        axes[0, 1].set_xlabel("Number of Communities (k)")
        axes[0, 1].set_ylabel("Volume")

        # Edges cut trend
        self.metrics_df.groupby('k')['edges_cut'].mean().plot(
            ax=axes[1, 0], color='red', marker='o', title='Average Edges Cut per Community'
        )
        axes[1, 0].set_xlabel("Number of Communities (k)")
        axes[1, 0].set_ylabel("Edges Cut")

        # Community size trend
        self.metrics_df.groupby('k')['community_size'].mean().plot(
            ax=axes[1, 1], color='purple', marker='o', title='Average Community Size'
        )
        axes[1, 1].set_xlabel("Number of Communities (k)")
        axes[1, 1].set_ylabel("Community Size")

        plt.tight_layout()
        plt.show()

    def visualize_communities(self, k):
        """
        Trực quan hóa đồ thị với phân vùng cộng đồng tại bước k.
        """
        if k > len(self.communities) or k < 1:
            print(f"Giá trị k không hợp lệ! Vui lòng chọn từ 1 đến {len(self.communities)}.")
            return

        communities = self.communities[k - 1]
        pos = nx.spring_layout(self.G, seed=42)
        colors = ['#%06X' % (i * 0xFFFFFF // len(communities)) for i in range(len(communities))]
        node_colors = []

        for node in self.G.nodes():
            for idx, community in enumerate(communities):
                if node in community:
                    node_colors.append(colors[idx])
                    break

        plt.figure(figsize=(10, 10))
        nx.draw(self.G, pos, node_color=node_colors, with_labels=True, node_size=500)
        plt.title(f"Community Visualization at k={k}")
        plt.show()

    def run_pipeline(self):
        """
        Chạy toàn bộ pipeline: phát hiện cộng đồng, phân tích và trực quan hóa.
        """
        print("Bắt đầu phát hiện cộng đồng...")
        self.detect_communities()

        print("Phân tích cộng đồng...")
        self.analyze_communities()

        print("Trực quan hóa xu hướng chỉ số...")
        self.plot_metrics()

        print("Trực quan hóa đồ thị...")
        self.visualize_communities(k=2)  # Hiển thị bước k=2 (tùy chọn)

# Sử dụng class với đồ thị bất kỳ
if __name__ == "__main__":
    # Ví dụ: Đồ thị câu lạc bộ karate
    G = nx.karate_club_graph()
    print(f"Đồ thị có {G.number_of_nodes()} nút và {G.number_of_edges()} cạnh.")

    analyzer = SocialNetworkCommunityAnalyzer(G)
    analyzer.run_pipeline()
