# 1. Network Construction & Initial Validation

This notebook covers the first steps of the simulation plan: loading the synthetic data, constructing the social network graph using `networkx`, and performing a basic structural validation to ensure the graph is well-formed.

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

pd.options.display.max_rows = 10

## Load Datasets

In [None]:
STUDENTS_PATH = "../data/students.csv"
EDGES_PATH = "../data/edges.csv"

try:
    students_df = pd.read_csv(STUDENTS_PATH)
    edges_df = pd.read_csv(EDGES_PATH)
    print("Successfully loaded datasets.")
except FileNotFoundError as e:
    print(f"Error: {e}. Make sure you have run the data generation script first.")

In [None]:
students_df.head()

In [None]:
edges_df.head()

## Construct the Graph

In [None]:
G = nx.Graph()

# Add nodes with attributes from the students dataframe
for index, row in students_df.iterrows():
    node_id = row['id']
    attributes = row.to_dict()
    G.add_node(node_id, **attributes)

# Add edges with attributes from the edges dataframe
for index, row in edges_df.iterrows():
    G.add_edge(row['source'], row['target'], layer=row['layer'], weight=row['weight'])

## Structural Validation

In [None]:
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

print(f"Graph constructed successfully.")
print(f"Number of nodes: {num_nodes}")
print(f"Number of edges: {num_edges}")

### Degree Distribution

In [None]:
degrees = [G.degree(n) for n in G.nodes()]

plt.figure(figsize=(10, 6))
plt.hist(degrees, bins=50, alpha=0.75)
plt.title("Node Degree Distribution")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

### Clustering Coefficient

In [None]:
# This can be slow for large graphs. We can sample if needed.
avg_clustering = nx.average_clustering(G)

print(f"Average Clustering Coefficient: {avg_clustering:.4f}")

### Connected Components

In [None]:
num_components = nx.number_connected_components(G)
print(f"Number of connected components: {num_components}")

if num_components > 1:
    largest_component = max(nx.connected_components(G), key=len)
    print(f"Size of largest component: {len(largest_component)} nodes")

### Multi-Layer Network Analysis

In [None]:
layer_counts = edges_df['layer'].value_counts()
print("Edge Counts per Layer:")
print(layer_counts)

In [None]:
plt.figure(figsize=(10, 6))
layer_counts.plot(kind='bar', color=['skyblue', 'coral', 'lightgreen', 'gold'])
plt.title('Distribution of Edges Across Layers')
plt.xlabel('Layer')
plt.ylabel('Number of Edges')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--')
plt.show()

### Average Degree per Layer

In [None]:
for layer in edges_df['layer'].unique():
    layer_edges = [(u, v) for u, v, d in G.edges(data=True) if d['layer'] == layer]
    subgraph = nx.Graph(layer_edges)
    if subgraph.number_of_nodes() > 0:
        avg_degree = sum(dict(subgraph.degree()).values()) / subgraph.number_of_nodes()
        print(f"Average degree for layer '{layer}': {avg_degree:.2f}")