# Intellectual Network Map - Exploratory Analysis

This notebook explores the network structure of intellectual problems and reading patterns.

**Author:** Joseph Z. Stafura  
**Date:** January 2026

In [None]:
# Imports
import json
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

print("‚úì Imports successful")

## 1. Load Data

In [None]:
# Load problem categories
with open('../data/processed/problem_categories.json', 'r') as f:
    categories = json.load(f)

# Load book metadata
with open('../data/processed/book_metadata.json', 'r') as f:
    metadata = json.load(f)

print(f"Total books: {metadata['metadata']['total_books']}")
print(f"Problems defined: {categories['metadata']['problems_defined']}")
print(f"Bridge authors: {len(categories['key_bridge_authors'])}")

## 2. Reading Statistics

In [None]:
# Reading by status
status_data = metadata['reading_statistics']['by_status']
status_df = pd.DataFrame(list(status_data.items()), columns=['Status', 'Count'])

# Plot
fig, ax = plt.subplots(1, 2, figsize=(14, 5))

# Status distribution
ax[0].bar(status_df['Status'], status_df['Count'], color=['#4ECDC4', '#FF6B6B', '#95E1D3'])
ax[0].set_title('Books by Status', fontsize=14, fontweight='bold')
ax[0].set_ylabel('Count')
ax[0].tick_params(axis='x', rotation=45)

# Rating distribution
ratings = metadata['reading_statistics']['rating_distribution']
rating_labels = ['5‚òÖ', '4‚òÖ', '3‚òÖ', '2‚òÖ', '1‚òÖ', 'Unrated']
rating_values = [ratings['5_stars'], ratings['4_stars'], ratings['3_stars'], 
                 ratings['2_stars'], ratings['1_stars'], ratings['unrated']]

ax[1].bar(rating_labels, rating_values, color=['#FFD700', '#C0C0C0', '#CD7F32', '#A9A9A9', '#696969', '#D3D3D3'])
ax[1].set_title('Rating Distribution', fontsize=14, fontweight='bold')
ax[1].set_ylabel('Count')

plt.tight_layout()
plt.show()

print(f"\nAverage rating: {metadata['reading_statistics']['average_rating']}‚≠ê")

## 3. Problem Coverage

In [None]:
# Extract problem data
problems_data = []
for problem, data in categories['problems'].items():
    problems_data.append({
        'Problem': problem,
        'Books': data['estimated_books'],
        'Avg Rating': data['avg_rating']
    })

problems_df = pd.DataFrame(problems_data).sort_values('Books', ascending=False)

# Plot
fig, ax = plt.subplots(1, 2, figsize=(15, 6))

# Books per problem
colors = ['#FF6B6B', '#4ECDC4', '#FF8C42', '#95E1D3', '#F38181', '#AA96DA', '#FCBAD3', '#A8D8EA']
ax[0].barh(problems_df['Problem'], problems_df['Books'], color=colors[:len(problems_df)])
ax[0].set_xlabel('Number of Books')
ax[0].set_title('Reading Coverage by Problem', fontsize=14, fontweight='bold')
ax[0].invert_yaxis()

# Average ratings
ax[1].barh(problems_df['Problem'], problems_df['Avg Rating'], color=colors[:len(problems_df)])
ax[1].set_xlabel('Average Rating')
ax[1].set_title('Average Rating by Problem', fontsize=14, fontweight='bold')
ax[1].set_xlim(3.5, 5.0)
ax[1].invert_yaxis()

plt.tight_layout()
plt.show()

print("\nüìä Coverage Summary:")
print(problems_df.to_string(index=False))

## 4. Network Analysis

In [None]:
# Build the graph
G = nx.Graph()

# Add problem nodes
for problem, data in categories['problems'].items():
    G.add_node(problem, node_type='problem', books=data['estimated_books'])

# Add connections
for conn in categories['connections']:
    G.add_edge(conn['from'], conn['to'], weight=conn['strength'])

# Add bridge authors
for author, data in categories['key_bridge_authors'].items():
    G.add_node(author, node_type='author', significance=data['significance'])
    G.add_edge(author, data['primary_problem'], weight=3)
    for bridge in data['bridges']:
        G.add_edge(author, bridge, weight=1)

print(f"Graph Statistics:")
print(f"  Nodes: {G.number_of_nodes()}")
print(f"  Edges: {G.number_of_edges()}")
print(f"  Density: {nx.density(G):.3f}")
print(f"  Average degree: {sum(dict(G.degree()).values()) / G.number_of_nodes():.2f}")

In [None]:
# Centrality measures
problem_nodes = [n for n in G.nodes() if G.nodes[n].get('node_type') == 'problem']
degree_centrality = nx.degree_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G)

centrality_data = []
for node in problem_nodes:
    centrality_data.append({
        'Problem': node,
        'Degree': degree_centrality[node],
        'Betweenness': betweenness_centrality[node]
    })

centrality_df = pd.DataFrame(centrality_data).sort_values('Degree', ascending=False)

print("\nüéØ Most Central Problems:")
print(centrality_df.to_string(index=False))

In [None]:
# Visualize the network
plt.figure(figsize=(14, 10))

# Calculate positions
pos = nx.spring_layout(G, k=2, iterations=50, seed=42)

# Draw edges
nx.draw_networkx_edges(G, pos, alpha=0.3, width=1)

# Draw problem nodes
problem_nodes = [n for n in G.nodes() if G.nodes[n].get('node_type') == 'problem']
node_sizes = [G.nodes[n]['books'] * 100 for n in problem_nodes]
nx.draw_networkx_nodes(G, pos, nodelist=problem_nodes, 
                       node_size=node_sizes, node_color='#FF6B6B', 
                       alpha=0.8, label='Problems')

# Draw author nodes
author_nodes = [n for n in G.nodes() if G.nodes[n].get('node_type') == 'author']
author_sizes = [G.nodes[n]['significance'] * 80 for n in author_nodes]
nx.draw_networkx_nodes(G, pos, nodelist=author_nodes, 
                       node_size=author_sizes, node_color='#4ECDC4',
                       alpha=0.6, label='Bridge Authors')

# Labels
nx.draw_networkx_labels(G, pos, font_size=9, font_weight='bold')

plt.title('Intellectual Network: Problems and Bridge Authors', fontsize=16, fontweight='bold', pad=20)
plt.legend(scatterpoints=1, frameon=True, loc='upper right')
plt.axis('off')
plt.tight_layout()
plt.show()

## 5. Bridge Authors Analysis

In [None]:
# Analyze bridge strength
bridge_data = []
for author, data in categories['key_bridge_authors'].items():
    bridge_data.append({
        'Author': author,
        'Primary Problem': data['primary_problem'],
        'Bridges To': len(data['bridges']),
        'Significance': data['significance'],
        'Works': len(data['works'])
    })

bridge_df = pd.DataFrame(bridge_data).sort_values('Significance', ascending=False)

# Plot
fig, ax = plt.subplots(figsize=(12, 6))

x = range(len(bridge_df))
width = 0.35

ax.bar([i - width/2 for i in x], bridge_df['Bridges To'], width, 
       label='Problems Bridged', color='#4ECDC4', alpha=0.8)
ax.bar([i + width/2 for i in x], bridge_df['Significance'], width,
       label='Significance', color='#FF6B6B', alpha=0.8)

ax.set_xlabel('Author')
ax.set_ylabel('Count / Score')
ax.set_title('Bridge Authors: Connections and Significance', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(bridge_df['Author'], rotation=45, ha='right')
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print("\nüåâ Bridge Authors Summary:")
print(bridge_df.to_string(index=False))

## 6. Reading Trajectory

In [None]:
# Extract reading bursts
bursts = metadata['temporal_patterns']['reading_bursts']

burst_df = pd.DataFrame(bursts)

# Plot timeline
fig, ax = plt.subplots(figsize=(12, 6))

periods = burst_df['period'].tolist()
books = burst_df['books'].tolist()

colors_timeline = ['#FF6B6B', '#4ECDC4', '#AA96DA']
ax.bar(periods, books, color=colors_timeline, alpha=0.8)

ax.set_xlabel('Period', fontsize=12)
ax.set_ylabel('Books Read', fontsize=12)
ax.set_title('Reading Intensity Over Time', fontsize=14, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

# Add annotations
for i, (period, count, focus) in enumerate(zip(periods, books, burst_df['focus'])):
    ax.text(i, count + 3, focus, ha='center', va='bottom', fontsize=9, style='italic')

plt.tight_layout()
plt.show()

print("\nüìÖ Reading Bursts:")
for _, row in burst_df.iterrows():
    print(f"  {row['period']}: {row['books']} books - {row['focus']}")

## 7. Top Authors

In [None]:
# Extract most-read authors
top_authors = metadata['author_frequency']['most_read_authors']
authors_df = pd.DataFrame(top_authors)

# Plot
fig, ax = plt.subplots(figsize=(12, 8))

# Create color map based on average rating
norm = plt.Normalize(vmin=authors_df['avg_rating'].min(), vmax=5.0)
colors_map = plt.cm.RdYlGn(norm(authors_df['avg_rating']))

bars = ax.barh(authors_df['author'], authors_df['books'], color=colors_map)
ax.set_xlabel('Number of Books Read', fontsize=12)
ax.set_title('Most-Read Authors', fontsize=14, fontweight='bold')
ax.invert_yaxis()

# Add rating labels
for i, (author, books, rating) in enumerate(zip(authors_df['author'], 
                                                  authors_df['books'], 
                                                  authors_df['avg_rating'])):
    ax.text(books + 0.2, i, f"{rating:.1f}‚≠ê", va='center', fontsize=9)

plt.tight_layout()
plt.show()

print("\nüìö Top 10 Authors:")
print(authors_df.to_string(index=False))

## 8. Key Insights

### Coverage Patterns:
- **Strongest area**: Social Structure (35 books, avg 4.3‚òÖ)
- **Weakest area**: Temporality (8 books, avg 4.1‚òÖ)
- **Highest rated**: Place/Space (4.5‚òÖ avg)

### Network Structure:
- **Most central problem**: Social Structure (highest degree centrality)
- **Bridge authors**: 8 key figures connecting multiple domains
- **Strongest connections**: Violence ‚Üî Social Structure, Consciousness ‚Üî Self/Subject

### Reading Trajectory:
- **Early phase (2007-08)**: Foundations in philosophy and cognitive science
- **Middle phase (2014-15)**: Maximalist fiction and continental philosophy
- **Current phase (2024-25)**: Contemporary theory and micro-sociology

### Author Patterns:
- **Perfect 5‚òÖ authors**: Joan Didion, Peter Sloterdijk
- **Most prolific**: William T. Vollmann (10 books)
- **Highest impact**: Thomas Pynchon (7 books, 4.7‚òÖ avg)

## 9. Next Steps

Based on this analysis, priority areas for expansion:

1. **Fill temporal gap**: Add Husserl, Koselleck, Hartog
2. **Strengthen connections**: Read more work bridging Language ‚Üî Violence
3. **Complete series**: Finish Knausg√•rd trilogy, stuck books (N√°das, Gass)
4. **Methodological depth**: Add ethnography texts, Bayesian stats, network analysis
5. **Diversify perspectives**: Critical race theory, feminist theory, disability studies