# Measure 4: Social Network Analysis (Clean Graph + Separate Data Table)

## 1. Introduction
**Objective:** Visualize the social structure of *Anna Karenina* and quantify character interactions.

## 2. Methodology
* **Visual:** A Network Graph where node size represents popularity and color represents centrality (Yellow = High).
* **Data:** A separate table listing the exact connection counts (Degree) for every character.

In [None]:
%pip install networkx pandas matplotlib nltk

In [None]:
import os
import itertools
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from IPython.display import display

# --- CONFIGURATION ---
nltk.download('punkt')
nltk.download('punkt_tab')

DATA_DIR = '../data'
RESULTS_DIR = '../results'

if not os.path.exists(RESULTS_DIR):
    os.makedirs(RESULTS_DIR)

CONFIG = {
    "filename": "The Project Gutenberg eBook of Anna Karenina, by Leo Tolstoy.txt",
    "characters": ["Anna", "Vronsky", "Levin", "Kitty", "Karenin", "Stiva", "Dolly", "Betsy"]
}

## 3. Processing Functions
Standard text processing to identify co-occurrences in sentences.

In [None]:
def load_text(filename):
    filepath = os.path.join(DATA_DIR, filename)
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return f.read()
    except FileNotFoundError:
        print(f"ERROR: File not found at {filepath}")
        return ""

def build_graph(text, characters):
    sentences = sent_tokenize(text)
    G = nx.Graph()
    G.add_nodes_from(characters)
    char_map = {c.lower(): c for c in characters}
    
    print(f"Scanning {len(sentences)} sentences...")
    
    for sent in sentences:
        tokens = set(word_tokenize(sent.lower()))
        found = [char_map[c] for c in char_map if c in tokens]
        if len(found) > 1:
            for pair in itertools.combinations(found, 2):
                u, v = pair
                if G.has_edge(u, v):
                    G[u][v]['weight'] += 1
                else:
                    G.add_edge(u, v, weight=1)
    return G

## 4. Visualization & Data Table
This section generates:
1.  **The Graph:** A clean visual with no obstructing text boxes.
2.  **The Table:** A standard Dataframe displayed below the graph.

In [None]:
def analyze_and_draw(G):
    # --- 1. DRAW THE GRAPH (CLEAN) ---
    plt.figure(figsize=(14, 10), facecolor='white')
    ax = plt.gca()
    
    # Layout & Metrics
    pos = nx.spring_layout(G, k=1.5, iterations=50, seed=42) 
    centrality = nx.degree_centrality(G)
    node_sizes = [v * 8000 + 500 for v in centrality.values()]
    weights = [G[u][v]['weight'] for u, v in G.edges()]
    max_weight = max(weights) if weights else 1
    
    # Draw Edges
    for (u, v, d) in G.edges(data=True):
        width = (d['weight'] / max_weight) * 4 + 0.5
        nx.draw_networkx_edges(G, pos, edgelist=[(u, v)], width=width, alpha=0.3, 
                               edge_color="#555555", connectionstyle="arc3,rad=0.1", 
                               arrows=True, arrowstyle="-", ax=ax)

    # Draw Nodes
    nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=list(centrality.values()), 
                           cmap=plt.cm.plasma, alpha=0.9, edgecolors='white', linewidths=2, ax=ax)
    
    # Draw Labels
    labels = nx.draw_networkx_labels(G, pos, font_size=12, font_weight="bold")
    import matplotlib.patheffects as path_effects
    for _, label in labels.items():
        label.set_path_effects([path_effects.withStroke(linewidth=3, foreground='white')])

    plt.title("Character Interaction Network: Anna Karenina", fontsize=18, fontweight='bold', pad=20)
    plt.axis('off')
    
    # Save Graph
    save_path = f"{RESULTS_DIR}/anna_karenina_network_clean.png"
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.show()
    print(f"Graph saved to: {save_path}")

    # --- 2. DISPLAY THE TABLE (SEPARATE) ---
    print("\n" + "="*40)
    print("NETWORK DATA TABLE (DEGREE CENTRALITY)")
    print("="*40)
    
    degrees = dict(G.degree())
    df = pd.DataFrame(list(degrees.items()), columns=['Character', 'Connections (Degree)'])
    df = df.sort_values(by='Connections (Degree)', ascending=False).reset_index(drop=True)
    
    # Display as a nice dataframe
    display(df)
    
    # Save CSV for the group mate
    csv_path = f"{RESULTS_DIR}/anna_karenina_network_table.csv"
    df.to_csv(csv_path, index=False)
    print(f"\nTable saved to: {csv_path}")

## 5. Main Execution

In [None]:
def run_analysis():
    print("Loading text data...")
    text = load_text(CONFIG['filename'])
    
    if text:
        G = build_graph(text, CONFIG['characters'])
        if G.number_of_edges() > 0:
            print("Generating network visualization and data table...")
            analyze_and_draw(G)
        else:
            print("No interactions found among the specified characters.")
    else:
        print("File not found. Please check DATA_DIR path.")

run_analysis()