# Measure 4: Network Centrality ("The Hub")

## Goal
Create a social network graph to visualize character interactions.

## The Metrics
* **Degree Centrality:** Measures the number of direct connections a character has. (The "Popularity" score).
* **Betweenness Centrality:** Measures how often a character acts as a bridge between other groups. (The "Broker" score).

## The Visualization
* **Nodes (Dots):** Characters. Size = Centrality.
* **Edges (Lines):** Co-occurrence in the same sentence.
* **Target Outcome:** Anna should be central and large; Levin should be peripheral/isolated.

In [None]:
# INSTALL REQUIRED LIBRARIES
# We need 'networkx' for graph theory calculations
%pip install networkx pandas matplotlib nltk

In [None]:
import os
import itertools
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize

# --- SETUP ---
nltk.download('punkt')
nltk.download('punkt_tab')

# PATHS
DATA_DIR = '../data'
RESULTS_DIR = '../results'

if not os.path.exists(RESULTS_DIR):
    os.makedirs(RESULTS_DIR)

# CONFIGURATION
# We strictly map the characters mentioned in the assignment prompt.
# To make the graph "Hub" effect visible, we include key connecting characters (like Stiva/Dolly).
BOOKS_CONFIG = {
    "Anna Karenina": {
        "filename": "The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy.txt",
        # Added Stiva/Dolly/Betsy to ensure the network has enough nodes to show structure
        "characters": ["Anna", "Vronsky", "Levin", "Kitty", "Karenin", "Stiva", "Dolly", "Betsy"]
    },
    "War and Peace": {
        "filename": "The Project Gutenberg eBook of War and Peace, by Leo Tolstoy.txt",
        "characters": ["Pierre", "Natasha", "Andrei", "Rostov", "Mary", "Helene", "Anatole", "Kutuzov"]
    }
}

## 2. Network Building Functions
These functions scan the text sentence-by-sentence. If two characters appear in the same sentence, a link (edge) is created between them.

In [None]:
def load_text(filename):
    filepath = os.path.join(DATA_DIR, filename)
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return f.read()
    except FileNotFoundError:
        print(f"Error: {filepath} not found.")
        return ""

def build_interaction_network(text, char_list):
    """
    Scans sentences. If two characters co-occur, add an edge.
    Returns a NetworkX Graph object.
    """
    sentences = sent_tokenize(text)
    G = nx.Graph()
    
    # Add all characters as nodes first (even if they have no connections)
    for char in char_list:
        G.add_node(char)
        
    # Normalize names for search
    char_map = {c.lower(): c for c in char_list}
    
    print(f"Scanning {len(sentences)} sentences for interactions...")
    
    for sent in sentences:
        # Tokenize and clean sentence slightly for better matching
        tokens = set(word_tokenize(sent.lower()))
        
        # Find which characters are in this sentence
        present_chars = [char_map[c] for c in char_map if c in tokens]
        
        # If 2 or more characters are present, draw edges between them
        if len(present_chars) > 1:
            # Create all pairs (combinations of 2)
            for pair in itertools.combinations(present_chars, 2):
                u, v = pair
                # If edge exists, increase weight (strength of relationship)
                if G.has_edge(u, v):
                    G[u][v]['weight'] += 1
                else:
                    G.add_edge(u, v, weight=1)
                    
    return G

## 3. Visualization & Analysis
This calculates the centrality scores and draws the network. The **node size** is dynamically adjusted based on the character's importance.

In [None]:
def analyze_and_visualize(G, book_title):
    # 1. Calculate Centrality Metrics
    degree_centrality = nx.degree_centrality(G)
    betweenness = nx.betweenness_centrality(G)
    
    # 2. Export Data to CSV
    df = pd.DataFrame({
        'Character': degree_centrality.keys(),
        'Degree_Centrality': degree_centrality.values(),
        'Betweenness_Centrality': betweenness.values()
    }).sort_values(by='Degree_Centrality', ascending=False)
    
    safe_title = book_title.split()[0].lower()
    csv_path = f"{RESULTS_DIR}/{safe_title}_network_metrics.csv"
    df.to_csv(csv_path, index=False)
    print(f"Saved metrics to {csv_path}")
    print(df.head(10))
    
    # 3. Draw the Graph
    plt.figure(figsize=(12, 10))
    
    # Layout: Spring layout uses force-directed algorithms 
    # (Central nodes are pulled to center, isolated ones pushed out)
    pos = nx.spring_layout(G, k=0.8, seed=42) 
    
    # Determine Node Size based on Degree Centrality
    # Multiply by a factor (e.g., 5000) to make dots visible
    node_sizes = [v * 5000 for v in degree_centrality.values()]
    
    # Determine Edge Width based on weight (frequency of interaction)
    weights = [G[u][v]['weight'] for u, v in G.edges()]
    # Normalize edge widths so they aren't too thick
    max_weight = max(weights) if weights else 1
    edge_widths = [(w / max_weight) * 5 for w in weights]

    # Draw Nodes
    nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="skyblue", alpha=0.9)
    
    # Draw Edges
    nx.draw_networkx_edges(G, pos, width=edge_widths, alpha=0.5, edge_color="gray")
    
    # Draw Labels
    nx.draw_networkx_labels(G, pos, font_size=12, font_weight="bold")
    
    plt.title(f"Network Centrality: {book_title}\n(Node Size = Centrality)", fontsize=15)
    plt.axis('off') # Turn off axis numbers
    
    # Save Graph
    img_path = f"{RESULTS_DIR}/{safe_title}_network_graph.png"
    plt.savefig(img_path)
    print(f"Graph saved to {img_path}")
    plt.show()

## 4. Main Execution

In [None]:
def run_network_analysis():
    print("Starting Network Analysis...")
    
    for book_title, config in BOOKS_CONFIG.items():
        print(f"\n--- Processing {book_title} ---")
        
        text = load_text(config['filename'])
        if not text:
            continue
            
        # Build Graph
        G = build_interaction_network(text, config['characters'])
        
        if G.number_of_edges() == 0:
            print("No interactions found! Check character names or file content.")
            continue
            
        # Visualize
        analyze_and_visualize(G, book_title)

run_network_analysis()