# Measure 4: Network Centrality (Two Separate Graphs)

## Goal
Generate distinct social network graphs for:
1.  **Anna Karenina:** Focusing on the split between Anna's circle and Levin's circle.
2.  **War and Peace:** Focusing on the connections between the major families (Rostovs, Bolkonskys, Bezukhovs).

## Output
Two separate PNG images will be generated and displayed below.

In [None]:
# Install necessary libraries if not already installed
%pip install networkx pandas matplotlib nltk

In [None]:
import os
import itertools
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize

# --- 1. SETUP & DEBUGGING ---
nltk.download('punkt')
nltk.download('punkt_tab')

DATA_DIR = '../data'
RESULTS_DIR = '../results'

if not os.path.exists(RESULTS_DIR):
    os.makedirs(RESULTS_DIR)

# DEBUG: Print all files found in the data folder to fix 'File Not Found' errors
print("Checking files in '../data' folder:")
try:
    files = os.listdir(DATA_DIR)
    for f in files:
        print(f" - {f}")
except FileNotFoundError:
    print("ERROR: '../data' folder not found. Are you running this from the 'notebooks' folder?")

# --- 2. CONFIGURATION ---
# We define two separate configurations.
BOOKS_CONFIG = {
    "Anna Karenina": {
        # Copy the EXACT filename from the list printed above if this fails
        "filename": "The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy.txt",
        "characters": ["Anna", "Vronsky", "Levin", "Kitty", "Karenin", "Stiva", "Dolly", "Betsy"]
    },
    "War and Peace": {
        # Copy the EXACT filename from the list printed above if this fails
        "filename": "The Project Gutenberg eBook of War and Peace, by Leo Tolstoy.txt",
        "characters": ["Pierre", "Natasha", "Andrei", "Rostov", "Mary", "Helene", "Anatole", "Kutuzov"]
    }
}

## 3. Network Processing Functions

In [None]:
def load_text(filename):
    filepath = os.path.join(DATA_DIR, filename)
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return f.read()
    except FileNotFoundError:
        print(f"ERROR: Could not find file: {filename}")
        return ""

def build_graph(text, characters):
    sentences = sent_tokenize(text)
    G = nx.Graph()
    G.add_nodes_from(characters)
    
    # Normalize for matching
    char_map = {c.lower(): c for c in characters}
    
    print(f"Scanning {len(sentences)} sentences...")
    
    for sent in sentences:
        tokens = set(word_tokenize(sent.lower()))
        found = [char_map[c] for c in char_map if c in tokens]
        
        if len(found) > 1:
            for pair in itertools.combinations(found, 2):
                u, v = pair
                if G.has_edge(u, v):
                    G[u][v]['weight'] += 1
                else:
                    G.add_edge(u, v, weight=1)
    return G

def draw_network(G, title, filename_prefix):
    plt.figure(figsize=(10, 8))
    
    # Calculate Centrality for node size
    centrality = nx.degree_centrality(G)
    node_sizes = [v * 6000 for v in centrality.values()]
    
    # Layout
    pos = nx.spring_layout(G, k=0.9, seed=42)
    
    # Draw
    nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="skyblue", alpha=0.9)
    nx.draw_networkx_edges(G, pos, width=2, alpha=0.4, edge_color="gray")
    nx.draw_networkx_labels(G, pos, font_size=11, font_weight="bold")
    
    plt.title(f"Network Graph: {title}", fontsize=15)
    plt.axis('off')
    
    save_path = f"{RESULTS_DIR}/{filename_prefix}_network.png"
    plt.savefig(save_path)
    print(f"Saved graph to: {save_path}")
    plt.show()

## 4. Main Execution (Loops through both books)

In [None]:
def run_analysis():
    for book_name, config in BOOKS_CONFIG.items():
        print(f"\n\n========================================")
        print(f"PROCESSING: {book_name}")
        print(f"========================================")
        
        # 1. Load
        text = load_text(config['filename'])
        if not text: 
            continue
            
        # 2. Build
        G = build_graph(text, config['characters'])
        
        # 3. Check & Draw
        if G.number_of_edges() == 0:
            print("No interactions found. Check character names.")
        else:
            safe_name = book_name.lower().replace(" ", "_")
            draw_network(G, book_name, safe_name)

run_analysis()