# Lesson 2: Graph Representations - Hands-On Notebook

In this notebook, we'll work with different graph representations and learn how to convert between them.

## Learning Objectives
1. Implement different graph representations
2. Convert between representation formats
3. Work with PyTorch Geometric Data objects
4. Understand sparse matrices
5. Add node and edge features
6. Normalize adjacency matrices

In [None]:
# Import required libraries
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import coo_matrix, csr_matrix
import torch
from torch_geometric.data import Data
from torch_geometric.utils import to_networkx, from_networkx
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
torch.manual_seed(42)

plt.rcParams['figure.figsize'] = (12, 8)
%matplotlib inline

## Part 1: Adjacency Matrix Representation

In [None]:
# Create a simple graph
G = nx.Graph()
G.add_edges_from([(0, 1), (0, 2), (1, 2), (2, 3), (3, 4)])

# Convert to adjacency matrix
A = nx.adjacency_matrix(G).todense()

print("Adjacency Matrix:")
print(A)
print(f"\nShape: {A.shape}")
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Graph visualization
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, ax=axes[0], with_labels=True, node_color='lightblue',
        node_size=1000, font_size=16, font_weight='bold')
axes[0].set_title('Graph', fontsize=14)
axes[0].axis('off')

# Adjacency matrix visualization
im = axes[1].imshow(A, cmap='Blues', aspect='auto')
axes[1].set_xticks(range(5))
axes[1].set_yticks(range(5))
axes[1].set_xlabel('Node', fontsize=12)
axes[1].set_ylabel('Node', fontsize=12)
axes[1].set_title('Adjacency Matrix', fontsize=14)
plt.colorbar(im, ax=axes[1])

# Add values to cells
for i in range(5):
    for j in range(5):
        text = axes[1].text(j, i, int(A[i, j]),
                           ha="center", va="center", color="black", fontsize=12)

plt.tight_layout()
plt.show()

In [None]:
# Verify adjacency matrix properties

# For undirected graph: A should be symmetric
is_symmetric = np.allclose(A, A.T)
print(f"Is adjacency matrix symmetric? {is_symmetric}")

# Diagonal should be zero (no self-loops)
has_self_loops = np.any(np.diag(A) != 0)
print(f"Has self-loops? {has_self_loops}")

# Sum of row i = degree of node i
print("\nDegree of each node (from adjacency matrix):")
degrees = np.sum(A, axis=1).A1  # .A1 converts matrix to 1D array
for i, deg in enumerate(degrees):
    print(f"  Node {i}: {int(deg)}")

## Part 2: Edge List Representation

In [None]:
# Get edge list from NetworkX graph
edge_list = list(G.edges())
print("Edge List:")
for edge in edge_list:
    print(f"  {edge}")

# Convert edge list to NumPy array
edge_array = np.array(edge_list)
print(f"\nEdge array shape: {edge_array.shape}")
print(f"Number of edges: {len(edge_list)}")

In [None]:
# Create weighted edge list
weighted_edges = [(u, v, np.random.rand()) for u, v in edge_list]

print("Weighted Edge List:")
for u, v, w in weighted_edges:
    print(f"  ({u}, {v}): weight = {w:.3f}")

# Create weighted graph
G_weighted = nx.Graph()
G_weighted.add_weighted_edges_from(weighted_edges)

# Visualize
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G_weighted, seed=42)
nx.draw(G_weighted, pos, with_labels=True, node_color='lightcoral',
        node_size=1000, font_size=16, font_weight='bold')

# Draw edge weights
edge_labels = {(u, v): f"{d['weight']:.2f}" 
               for u, v, d in G_weighted.edges(data=True)}
nx.draw_networkx_edge_labels(G_weighted, pos, edge_labels, font_size=10)

plt.title('Weighted Graph from Edge List', fontsize=14)
plt.axis('off')
plt.show()

## Part 3: Adjacency List Representation

In [None]:
# Create adjacency list
adj_list = {node: list(G.neighbors(node)) for node in G.nodes()}

print("Adjacency List:")
for node, neighbors in adj_list.items():
    print(f"  Node {node}: {neighbors}")

# Verify total edges (each edge appears twice in undirected graph)
total_entries = sum(len(neighbors) for neighbors in adj_list.values())
print(f"\nTotal adjacency list entries: {total_entries}")
print(f"Number of edges: {G.number_of_edges()}")
print(f"Entries = 2 × edges? {total_entries == 2 * G.number_of_edges()}")

In [None]:
# Weighted adjacency list
weighted_adj_list = {}
for node in G_weighted.nodes():
    neighbors = []
    for neighbor in G_weighted.neighbors(node):
        weight = G_weighted[node][neighbor]['weight']
        neighbors.append((neighbor, weight))
    weighted_adj_list[node] = neighbors

print("Weighted Adjacency List:")
for node, neighbors in weighted_adj_list.items():
    print(f"  Node {node}:")
    for neighbor, weight in neighbors:
        print(f"    → {neighbor} (weight: {weight:.3f})")

## Part 4: Converting Between Representations

In [None]:
def edge_list_to_adj_matrix(edge_list, num_nodes):
    """
    Convert edge list to adjacency matrix
    """
    A = np.zeros((num_nodes, num_nodes))
    for u, v in edge_list:
        A[u, v] = 1
        A[v, u] = 1  # For undirected graph
    return A

def adj_matrix_to_edge_list(A):
    """
    Convert adjacency matrix to edge list
    """
    edge_list = []
    rows, cols = np.where(A > 0)
    for i, j in zip(rows, cols):
        if i < j:  # Avoid duplicates for undirected graph
            edge_list.append((i, j))
    return edge_list

def adj_matrix_to_adj_list(A):
    """
    Convert adjacency matrix to adjacency list
    """
    adj_list = {}
    for i in range(A.shape[0]):
        neighbors = np.where(A[i] > 0)[0].tolist()
        adj_list[i] = neighbors
    return adj_list

# Test conversions
test_edges = [(0, 1), (0, 2), (1, 2)]
print("Original edge list:", test_edges)

# Edge list → Adjacency matrix
A_converted = edge_list_to_adj_matrix(test_edges, 3)
print("\nConverted to adjacency matrix:")
print(A_converted)

# Adjacency matrix → Edge list
edges_back = adj_matrix_to_edge_list(A_converted)
print("\nConverted back to edge list:", edges_back)

# Adjacency matrix → Adjacency list
adj_list_converted = adj_matrix_to_adj_list(A_converted)
print("\nConverted to adjacency list:", adj_list_converted)

## Part 5: Sparse Matrix Representations

In [None]:
# Create a larger sparse graph
n_nodes = 100
G_large = nx.erdos_renyi_graph(n_nodes, 0.05, seed=42)

print(f"Large graph: {n_nodes} nodes, {G_large.number_of_edges()} edges")

# Dense adjacency matrix
A_dense = nx.adjacency_matrix(G_large).todense()
dense_size = A_dense.nbytes
print(f"\nDense matrix memory: {dense_size:,} bytes ({dense_size/1024:.2f} KB)")

# Sparse adjacency matrix (COO format)
A_sparse_coo = nx.adjacency_matrix(G_large, format='coo')
sparse_coo_size = (A_sparse_coo.data.nbytes + 
                   A_sparse_coo.row.nbytes + 
                   A_sparse_coo.col.nbytes)
print(f"Sparse COO memory: {sparse_coo_size:,} bytes ({sparse_coo_size/1024:.2f} KB)")

# Sparse adjacency matrix (CSR format)
A_sparse_csr = nx.adjacency_matrix(G_large, format='csr')
sparse_csr_size = (A_sparse_csr.data.nbytes + 
                   A_sparse_csr.indices.nbytes + 
                   A_sparse_csr.indptr.nbytes)
print(f"Sparse CSR memory: {sparse_csr_size:,} bytes ({sparse_csr_size/1024:.2f} KB)")

# Compression ratio
print(f"\nCompression ratio (dense/sparse): {dense_size/sparse_coo_size:.2f}x")

# Sparsity
sparsity = 1 - (A_sparse_coo.nnz / (n_nodes ** 2))
print(f"Sparsity: {sparsity:.4f} ({sparsity*100:.2f}% zeros)")

In [None]:
# Visualize sparse matrix structure
plt.figure(figsize=(10, 10))
plt.spy(A_sparse_coo, markersize=2, color='blue')
plt.title('Sparse Adjacency Matrix Structure', fontsize=14)
plt.xlabel('Node Index', fontsize=12)
plt.ylabel('Node Index', fontsize=12)
plt.show()

print(f"Non-zero elements: {A_sparse_coo.nnz}")
print(f"Total elements: {n_nodes * n_nodes}")
print(f"Percentage non-zero: {A_sparse_coo.nnz / (n_nodes**2) * 100:.2f}%")

## Part 6: PyTorch Geometric Format

In [None]:
# Create edge_index in COO format (PyG style)
edge_index = torch.tensor(list(G.edges()), dtype=torch.long).t().contiguous()

print("Edge Index (COO format):")
print(edge_index)
print(f"\nShape: {edge_index.shape}")
print(f"Format: [2, num_edges]")

# For undirected graphs, add reverse edges
edge_index_undirected = torch.cat([edge_index, edge_index.flip(0)], dim=1)
print(f"\nUndirected edge_index shape: {edge_index_undirected.shape}")
print(edge_index_undirected)

In [None]:
# Create node features
num_nodes = G.number_of_nodes()
num_features = 16

# Random node features
x = torch.randn(num_nodes, num_features)

print(f"Node feature matrix shape: {x.shape}")
print(f"First node features: {x[0][:5]}...")  # Show first 5 features

In [None]:
# Create PyG Data object
data = Data(x=x, edge_index=edge_index_undirected)

print("PyTorch Geometric Data object:")
print(data)
print(f"\nNumber of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of features: {data.num_node_features}")
print(f"Has isolated nodes: {data.has_isolated_nodes()}")
print(f"Has self-loops: {data.has_self_loops()}")
print(f"Is undirected: {data.is_undirected()}")

In [None]:
# Add edge features
num_edges = edge_index_undirected.shape[1]
edge_attr = torch.randn(num_edges, 4)  # 4 features per edge

# Create complete Data object
data_complete = Data(
    x=x,
    edge_index=edge_index_undirected,
    edge_attr=edge_attr
)

print("Complete Data object with edge features:")
print(data_complete)
print(f"\nEdge features shape: {data_complete.edge_attr.shape}")

## Part 7: Converting NetworkX to PyTorch Geometric

In [None]:
# Add node features to NetworkX graph
for node in G.nodes():
    G.nodes[node]['x'] = torch.randn(8)  # 8-dim features

# Convert NetworkX to PyG
data_from_nx = from_networkx(G)

print("Data converted from NetworkX:")
print(data_from_nx)
print(f"\nNode features shape: {data_from_nx.x.shape}")
print(f"Edge index shape: {data_from_nx.edge_index.shape}")

In [None]:
# Convert PyG back to NetworkX
G_from_pyg = to_networkx(data_from_nx, to_undirected=True)

print(f"Graph from PyG: {G_from_pyg.number_of_nodes()} nodes, {G_from_pyg.number_of_edges()} edges")

# Visualize
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G_from_pyg, seed=42)
nx.draw(G_from_pyg, pos, with_labels=True, node_color='lightgreen',
        node_size=1000, font_size=16, font_weight='bold')
plt.title('Graph Converted from PyG Data', fontsize=14)
plt.axis('off')
plt.show()

## Part 8: Degree Matrix and Normalization

In [None]:
# Get adjacency matrix
A = nx.adjacency_matrix(G).todense()

# Compute degree matrix
degrees = np.array(A.sum(axis=1)).flatten()
D = np.diag(degrees)

print("Degree Matrix D:")
print(D)

# Inverse degree matrix
D_inv = np.diag(1.0 / degrees)
print("\nInverse Degree Matrix D^(-1):")
print(D_inv)

In [None]:
# Row normalization: D^(-1) * A
A_row_norm = D_inv @ A

print("Row-normalized adjacency matrix:")
print(A_row_norm)

# Verify: each row sums to 1
row_sums = np.array(A_row_norm.sum(axis=1)).flatten()
print(f"\nRow sums (should be 1.0): {row_sums}")

In [None]:
# Symmetric normalization: D^(-1/2) * A * D^(-1/2)
D_sqrt_inv = np.diag(1.0 / np.sqrt(degrees))
A_sym_norm = D_sqrt_inv @ A @ D_sqrt_inv

print("Symmetrically normalized adjacency matrix:")
print(A_sym_norm)

# Verify symmetry
is_symmetric = np.allclose(A_sym_norm, A_sym_norm.T)
print(f"\nIs symmetric? {is_symmetric}")

In [None]:
# Add self-loops
I = np.eye(A.shape[0])
A_with_self_loops = A + I

print("Adjacency matrix with self-loops:")
print(A_with_self_loops)

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

im1 = axes[0].imshow(A, cmap='Blues', aspect='auto')
axes[0].set_title('Original A', fontsize=14)
plt.colorbar(im1, ax=axes[0])

im2 = axes[1].imshow(A_with_self_loops, cmap='Blues', aspect='auto')
axes[1].set_title('A with Self-loops', fontsize=14)
plt.colorbar(im2, ax=axes[1])

plt.tight_layout()
plt.show()

## Part 9: Batching Multiple Graphs

In [None]:
from torch_geometric.data import Batch

# Create multiple small graphs
data_list = []

for i in range(3):
    # Random small graph
    num_nodes = np.random.randint(3, 7)
    num_edges = np.random.randint(num_nodes, num_nodes * 2)
    
    # Random edges
    edge_index = torch.randint(0, num_nodes, (2, num_edges))
    
    # Random features
    x = torch.randn(num_nodes, 8)
    
    # Graph label
    y = torch.tensor([i])
    
    data_list.append(Data(x=x, edge_index=edge_index, y=y))

# Display individual graphs
for i, data in enumerate(data_list):
    print(f"Graph {i}: {data}")

In [None]:
# Batch graphs together
batch = Batch.from_data_list(data_list)

print("Batched graphs:")
print(batch)
print(f"\nTotal nodes: {batch.num_nodes}")
print(f"Total edges: {batch.num_edges}")
print(f"Number of graphs: {batch.num_graphs}")
print(f"\nBatch vector: {batch.batch}")
print("(indicates which graph each node belongs to)")

## Exercises

In [None]:
# Exercise 1: Write a function to convert adjacency list to edge list
def adj_list_to_edge_list(adj_list):
    """
    Convert adjacency list to edge list
    
    Args:
        adj_list: dict mapping nodes to list of neighbors
    
    Returns:
        list of tuples (u, v) representing edges
    """
    # YOUR CODE HERE
    pass

# Test your function
# test_adj = {0: [1, 2], 1: [0, 2], 2: [0, 1, 3], 3: [2]}
# edges = adj_list_to_edge_list(test_adj)
# print(edges)

In [None]:
# Exercise 2: Compare memory usage of dense vs sparse for different graph sizes
# Create graphs with n=10, 50, 100, 500, 1000 nodes and p=0.05
# Plot memory usage vs number of nodes

# YOUR CODE HERE


In [None]:
# Exercise 3: Implement normalized adjacency matrix with self-loops
# A_norm = D^(-1/2) * (A + I) * D^(-1/2)

def normalize_adjacency_with_self_loops(A):
    """
    Compute normalized adjacency matrix with self-loops
    
    Args:
        A: adjacency matrix
    
    Returns:
        Normalized adjacency matrix
    """
    # YOUR CODE HERE
    pass

# Test your function
# A_test = nx.adjacency_matrix(G).todense()
# A_normalized = normalize_adjacency_with_self_loops(A_test)
# print(A_normalized)

In [None]:
# Exercise 4: Create a PyG Data object from scratch for a molecular graph
# Represent caffeine molecule (or simple molecule of your choice)
# Include atom types as node features and bond types as edge features

# YOUR CODE HERE


In [None]:
# Exercise 5: Visualize how normalization affects feature aggregation
# Create a star graph (one center node connected to many peripheral nodes)
# Show how aggregation differs with/without normalization

# YOUR CODE HERE


## Summary

In this notebook, you learned:

1. Different graph representations (adjacency matrix, edge list, adjacency list)
2. How to convert between representations
3. Sparse matrix formats and their memory efficiency
4. PyTorch Geometric's COO format and Data objects
5. Adding node and edge features
6. Normalizing adjacency matrices
7. Batching multiple graphs together

**Next Steps**: In Lesson 3, we'll learn about message passing - the core operation that makes GNNs work!

---

**Congratulations on completing Lesson 2!**