# Brookes' Œî Calculator - Interactive Examples

**Author:** Ayman Eldakroury  
**Affiliation:** The American University in Cairo  
**GitHub:** https://github.com/YOUR_USERNAME/brookes-delta  
**arXiv Paper:** *Brookes' Categorical Dispersion Measure: A Bibliometric Compass*  

---

## What is Brookes' Œî?

Brookes' Measure of Categorical Dispersion (Œî) is a statistical metric that quantifies **thematic concentration vs. dispersion** in research output:

- **Œî ‚Üí 1.0**: High concentration (specialized research)
- **Œî ‚Üí 0.0**: High dispersion (interdisciplinary research)

This notebook provides interactive examples from the accompanying arXiv paper.

## Setup & Installation

In [None]:
# Install required packages (run once)
!pip install numpy pandas matplotlib -q

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import our Œî calculator
import sys
sys.path.append('.')
from delta_calculator import BrookesDeltaCalculator

print(" Libraries imported successfully!")
print(f"Œî calculator version: {BrookesDeltaCalculator.__doc__.split('\n')[0] if BrookesDeltaCalculator.__doc__ else '1.0.0'}")

## üìö Example 1: Computational Linguistics (From Paper)

In [None]:
# Example from the arXiv paper
cl_categories = ["NLP Applications", "Semantics", "Resources", 
                 "Syntax", "Discourse", "Morphology", "Phonology"]
cl_frequencies = [280, 220, 180, 120, 100, 70, 30]

# Create calculator
cl_calc = BrookesDeltaCalculator(cl_categories, cl_frequencies)
cl_results = cl_calc.calculate_delta()

print("=" * 60)
print("COMPUTATIONAL LINGUISTICS ANALYSIS")
print("=" * 60)
print(f"Total publications: {cl_results['total_publications']}")
print(f"Number of categories (T): {cl_results['T']}")
print(f"Mean frequency-rank (m): {cl_results['m']:.3f}")
print(f"\n Brookes' Œî = {cl_results['delta']:.3f}")
print(f" Interpretation: {cl_results['interpretation']}")

# Show the calculation table
print("\n" + "-" * 60)
print("CALCULATION TABLE")
print("-" * 60)
display(cl_results['table'][['Category', 'Frequency', 'Percentage', 'Rank', 'Desc_Rank']])

In [None]:
# Visualize the distribution
fig = cl_calc.visualize("cl_delta.png")
plt.suptitle("Computational Linguistics: Œî = {:.3f}".format(cl_results['delta']), fontsize=14)
plt.show()

##  Example 2: Field Comparison

In [None]:
# Compare two different fields
field1_cats = ["Theory", "Methods", "Applications", "Case Studies"]
field1_freq = [50, 30, 15, 5]  # Concentrated

field2_cats = ["Theory", "Methods", "Applications", "Case Studies"]
field2_freq = [30, 28, 25, 17]  # More dispersed

# Calculate
calc1 = BrookesDeltaCalculator(field1_cats, field1_freq)
calc2 = BrookesDeltaCalculator(field2_cats, field2_freq)

res1 = calc1.calculate_delta()
res2 = calc2.calculate_delta()

# Comparison visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Field 1
ax1.bar(field1_cats, field1_freq, color='skyblue', alpha=0.7)
ax1.set_title(f'Field 1: Œî = {res1["delta"]:.3f}\n({res1["interpretation"].split("(")[0]})')
ax1.set_ylabel('Frequency')
ax1.tick_params(axis='x', rotation=45)

# Field 2
ax2.bar(field2_cats, field2_freq, color='lightcoral', alpha=0.7)
ax2.set_title(f'Field 2: Œî = {res2["delta"]:.3f}\n({res2["interpretation"].split("(")[0]})')
ax2.set_ylabel('Frequency')
ax2.tick_params(axis='x', rotation=45)

plt.suptitle('Field Comparison: Concentration vs. Dispersion', fontsize=14)
plt.tight_layout()
plt.show()

# Statistical comparison
comparison = calc1.compare_fields(calc2)
print("\n STATISTICAL COMPARISON:")
print(f"   Field 1 Œî: {comparison['field1_delta']:.3f}")
print(f"   Field 2 Œî: {comparison['field2_delta']:.3f}")
print(f"   Difference: {comparison['difference']:.3f}")
print(f"   {comparison['comparison']}")
print(f"   Effect size: {comparison['effect_size']:.3f}")

## üìÅ Example 3: Load Data from CSV

In [None]:
# Load sample data from CSV
try:
    sample_df = pd.read_csv('data/sample_data.csv')
    print("üìÇ Sample Data Loaded:")
    display(sample_df)
    
    # Calculate Œî from CSV data
    csv_calc = BrookesDeltaCalculator(
        sample_df['Category'].tolist(),
        sample_df['Frequency'].tolist()
    )
    csv_results = csv_calc.calculate_delta()
    
    print(f"\n CSV Analysis Results:")
    print(f"   Œî = {csv_results['delta']:.3f}")
    print(f"   {csv_results['interpretation']}")
    
except FileNotFoundError:
    print("  CSV file not found. Using example data instead.")
    # Create example data
    data = {
        'Category': ['Theory', 'Methods', 'Applications', 'Review'],
        'Frequency': [40, 35, 20, 5]
    }
    sample_df = pd.DataFrame(data)
    display(sample_df)

##  Example 4: Your Research Data

In [None]:
# CELL 1: Load your research data
try:
    your_df = pd.read_csv('my_data.csv')
    print("‚úÖ Your research data loaded:")
    display(your_df)
    
except FileNotFoundError:
    print("üìù Enter your research data manually:")
    # Example structure - EDIT THIS WITH YOUR DATA
    your_data = {
        'Category': ['Arabic Literature & AI', 'Digital Humanities', 
                     'Computational Linguistics', 'Bibliometrics', 'Other'],
        'Frequency': [30, 25, 20, 15, 10]  # EDIT THESE NUMBERS!
    }
    your_df = pd.DataFrame(your_data)
    display(your_df)
    
    # Save for future use
    your_df.to_csv('my_data.csv', index=False)
    print("üíæ Data saved to 'my_data.csv'")

In [None]:
# CELL 2: Analyze your research
your_calc = BrookesDeltaCalculator(
    your_df['Category'].tolist(),
    your_df['Frequency'].tolist()
)
your_results = your_calc.calculate_delta()

print("=" * 60)
print("YOUR RESEARCH ANALYSIS")
print("=" * 60)
print(f"Total publications analyzed: {your_results['total_publications']}")
print(f"Number of categories: {your_results['T']}")
print(f"\nüéØ YOUR Brookes' Œî = {your_results['delta']:.3f}")
print(f"üí° {your_results['interpretation']}")

# Interpretation for paper
print("\nüìù FOR YOUR ARXIV PAPER:")
print(f"The analysis reveals a Œî value of {your_results['delta']:.3f}, indicating " + \
      your_results['interpretation'].lower().replace('(highly specialized field)', '').strip() + ".")

# Visualize
fig = your_calc.visualize("your_research_delta.png")
plt.suptitle(f"Your Research: Œî = {your_results['delta']:.3f}", fontsize=14)
plt.show()

## üß™ Advanced: Sensitivity Analysis

In [None]:
# How Œî changes with data distribution
def analyze_sensitivity(base_freq, variations):
    """Analyze how Œî changes with frequency variations"""
    results = []
    
    for i, variation in enumerate(variations):
        # Apply variation
        varied_freq = [f * variation for f in base_freq]
        varied_freq = [int(f) for f in varied_freq]
        
        # Calculate Œî
        calc = BrookesDeltaCalculator([f"Cat {j+1}" for j in range(len(base_freq))], varied_freq)
        delta = calc.calculate_delta()['delta']
        
        results.append({
            'Variation': variation,
            'Delta': delta,
            'Frequencies': varied_freq
        })
    
    return pd.DataFrame(results)

# Test
base = [100, 80, 60, 40, 20]
variations = [0.5, 0.75, 1.0, 1.25, 1.5]  # 50% to 150% of original

sensitivity_df = analyze_sensitivity(base, variations)
print("Sensitivity Analysis: How Œî changes with frequency scaling")
display(sensitivity_df[['Variation', 'Delta']])

# Plot
plt.figure(figsize=(10, 5))
plt.plot(sensitivity_df['Variation'], sensitivity_df['Delta'], 'o-', linewidth=2, markersize=8)
plt.xlabel('Frequency Scaling Factor')
plt.ylabel('Brookes Œî')
plt.title('Sensitivity Analysis: Œî vs Frequency Scaling')
plt.grid(True, alpha=0.3)
plt.show()

## üéì Conclusion & Next Steps

In [None]:
print("=" * 60)
print("SUMMARY OF YOUR ANALYSIS")
print("=" * 60)

# Collect all results
summary_data = [
    {"Dataset": "Computational Linguistics", "Œî": cl_results['delta'], 
     "Interpretation": cl_results['interpretation'].split('(')[0].strip()},
    {"Dataset": "Your Research", "Œî": your_results['delta'], 
     "Interpretation": your_results['interpretation'].split('(')[0].strip()},
]

summary_df = pd.DataFrame(summary_data)
display(summary_df)

print("\n FOR YOUR ARXIV PAPER:")
print("Include these Œî values in your Results section:")
for _, row in summary_df.iterrows():
    print(f"  ‚Ä¢ {row['Dataset']}: Œî = {row['Œî']:.3f} ({row['Interpretation']})")

print("\n NEXT STEPS:")
print("1. Upload this code to GitHub")
print("2. Include GitHub link in your arXiv paper")
print("3. Add Œî values to your Results section")
print("4. Reference the GitHub repository in your Data Availability statement")

In [None]:
# Export results for your paper
export_df = pd.DataFrame({
    'dataset': ['computational_linguistics', 'your_research'],
    'delta': [cl_results['delta'], your_results['delta']],
    'interpretation': [cl_results['interpretation'], your_results['interpretation']],
    'total_publications': [cl_results['total_publications'], your_results['total_publications']],
    'n_categories': [cl_results['T'], your_results['T']]
})

export_df.to_csv('delta_results.csv', index=False)
print("‚úÖ Results exported to 'delta_results.csv' (include in paper supplement)")