# 5. Network Analysis

This notebook performs a more in-depth analysis of the social network. We will calculate various network centrality measures to identify the most influential students and use community detection algorithms to identify the key communities in the student body.

In [None]:
import sys
import os
sys.path.append(os.path.abspath('..'))
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import community as community_louvain

pd.options.display.max_rows = 10

## Load Datasets

In [None]:
STUDENTS_PATH = "../data/students.csv"
EDGES_PATH = "../data/edges.csv"

try:
    students_df = pd.read_csv(STUDENTS_PATH)
    edges_df = pd.read_csv(EDGES_PATH)
    print("Successfully loaded datasets.")
except FileNotFoundError as e:
    print(f"Error: {e}. Make sure you have run the data generation script first.")

## Construct the Graph

In [None]:
G = nx.Graph()

# Add nodes with attributes from the students dataframe
for index, row in students_df.iterrows():
    node_id = row['id']
    attributes = row.to_dict()
    G.add_node(node_id, **attributes)

# Add edges with attributes from the edges dataframe
for index, row in edges_df.iterrows():
    G.add_edge(row['source'], row['target'], layer=row['layer'], weight=row['weight'])

## Centrality Measures

In [None]:
degree_centrality = nx.degree_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G)

nx.set_node_attributes(G, degree_centrality, 'degree_centrality')
nx.set_node_attributes(G, betweenness_centrality, 'betweenness_centrality')
nx.set_node_attributes(G, eigenvector_centrality, 'eigenvector_centrality')

# Get the top 10 most influential students by each centrality measure
top_10_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
top_10_eigenvector = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

print("Top 10 most influential students by degree centrality:")
print(top_10_degree)
print("
Top 10 most influential students by betweenness centrality:")
print(top_10_betweenness)
print("
Top 10 most influential students by eigenvector centrality:")
print(top_10_eigenvector)

## Community Detection

In [None]:
partition = community_louvain.best_partition(G)
nx.set_node_attributes(G, partition, 'community')

# Get the number of communities
num_communities = len(set(partition.values()))
print(f"Number of communities: {num_communities}")

## Network Visualization

In [None]:
plt.figure(figsize=(12, 12))
pos = nx.spring_layout(G, iterations=100)

# Color nodes by community
cmap = plt.get_cmap('viridis', num_communities)
nx.draw_networkx_nodes(G, pos, node_size=[v * 10000 for v in degree_centrality.values()], cmap=cmap, node_color=list(partition.values()))

# Draw edges
nx.draw_networkx_edges(G, pos, alpha=0.1)

plt.title('Social Network Graph')
plt.show()