In [2]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import random
import pandas as pd
from tqdm import tqdm

def barabasi_albert_model(initial_nodes, final_nodes, edges_per_step):
    """
    Implement the Barabási-Albert model for generating scale-free networks.
    
    Parameters:
    -----------
    initial_nodes : int
        Number of nodes in the initial random network
    final_nodes : int
        Total number of nodes in the final network
    edges_per_step : int
        Number of edges to add with each new node
    
    Returns:
    --------
    G : networkx.Graph
        The generated scale-free network
    """
    # Create initial network (complete graph)
    G = nx.complete_graph(initial_nodes)
    
    # Add nodes according to preferential attachment
    for i in range(initial_nodes, final_nodes):
        # Get all existing nodes and their degrees
        nodes = list(G.nodes())
        degrees = [G.degree(node) for node in nodes]
        
        # Calculate probabilities based on degrees
        total_degree = sum(degrees)
        probabilities = [degree / total_degree for degree in degrees]
        
        # Choose edges_per_step nodes based on preferential attachment
        targets = np.random.choice(nodes, size=edges_per_step, replace=False, p=probabilities)
        
        # Add new node and connect to targets
        G.add_node(i)
        for target in targets:
            G.add_edge(i, target)
    
    return G

def analyze_network(G):
    """
    Analyze network topology:
    - Average clustering coefficient
    - Characteristic path length
    - Degree distribution
    
    Parameters:
    -----------
    G : networkx.Graph
        The network to analyze
    
    Returns:
    --------
    dict
        Dictionary containing the analysis results
    """
    results = {}
    
    # Average clustering coefficient
    results['avg_clustering'] = nx.average_clustering(G)
    
    # Characteristic path length
    if nx.is_connected(G):
        results['char_path_length'] = nx.average_shortest_path_length(G)
    else:
        # For disconnected graphs, calculate average path length of largest component
        largest_cc = max(nx.connected_components(G), key=len)
        largest_subgraph = G.subgraph(largest_cc)
        results['char_path_length'] = nx.average_shortest_path_length(largest_subgraph)
        results['largest_component_size'] = len(largest_cc)
        results['total_nodes'] = G.number_of_nodes()
    
    # Degree distribution
    degrees = [G.degree(n) for n in G.nodes()]
    degree_counts = Counter(degrees)
    results['degree_distribution'] = degree_counts
    
    return results

def plot_degree_distribution(degree_counts):
    """
    Plot the degree distribution on a log-log scale.
    
    Parameters:
    -----------
    degree_counts : Counter
        Counts of each degree value
    """
    degrees = sorted(degree_counts.keys())
    counts = [degree_counts[d] for d in degrees]
    
    plt.figure(figsize=(10, 6))
    plt.loglog(degrees, counts, 'bo-', markersize=8, alpha=0.7)
    plt.xlabel('Degree (log scale)')
    plt.ylabel('Frequency (log scale)')
    plt.title('Degree Distribution (log-log scale)')
    plt.grid(True, alpha=0.3)
    plt.savefig('degree_distribution.png')
    plt.close()
    
def run_multiple_experiments(experiments=100):
    """
    Run multiple experiments with varying parameters
    
    Parameters:
    -----------
    experiments : int
        Number of experiments to run
    
    Returns:
    --------
    pandas.DataFrame
        Results from all experiments
    """
    results = []
    
    # Parameter ranges to explore
    initial_nodes_range = [5, 10, 15, 20]
    final_nodes_range = [100, 200, 300, 500]
    edges_per_step_range = [1, 2, 3, 5]
    
    for _ in tqdm(range(experiments)):
        # Randomly select parameters
        initial_nodes = random.choice(initial_nodes_range)
        final_nodes = random.choice(final_nodes_range)
        edges_per_step = random.choice(edges_per_step_range)
        
        # Ensure edges_per_step is not greater than initial_nodes
        edges_per_step = min(edges_per_step, initial_nodes)
        
        # Generate network
        G = barabasi_albert_model(initial_nodes, final_nodes, edges_per_step)
        
        # Analyze network
        analysis = analyze_network(G)
        
        # Store results
        result = {
            'initial_nodes': initial_nodes,
            'final_nodes': final_nodes,
            'edges_per_step': edges_per_step,
            'avg_clustering': analysis['avg_clustering'],
            'char_path_length': analysis['char_path_length'],
            'total_edges': G.number_of_edges(),
            'avg_degree': 2 * G.number_of_edges() / G.number_of_nodes(),
        }
        
        # Add info about connected components if the graph was disconnected
        if 'largest_component_size' in analysis:
            result['largest_component_ratio'] = analysis['largest_component_size'] / analysis['total_nodes']
            
        results.append(result)
        
        # For the first few experiments, save the degree distribution plots
        if _ < 5:
            plot_degree_distribution(analysis['degree_distribution'])
    
    return pd.DataFrame(results)

# Run experiments
np.random.seed(42)
random.seed(42)
results_df = run_multiple_experiments(experiments=100)

# Display summary statistics
print("Summary Statistics:")
print(results_df.describe())

# Plot relationships between parameters and network properties
plt.figure(figsize=(15, 10))

# Plot 1: Initial nodes vs. Avg Clustering
plt.subplot(2, 2, 1)
for edges in sorted(results_df['edges_per_step'].unique()):
    subset = results_df[results_df['edges_per_step'] == edges]
    plt.scatter(subset['initial_nodes'], subset['avg_clustering'], 
                label=f'm={edges}', alpha=0.7)
plt.xlabel('Initial Nodes')
plt.ylabel('Avg. Clustering Coefficient')
plt.title('Initial Network Size vs. Clustering')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Edges per step vs. Char Path Length
plt.subplot(2, 2, 2)
for nodes in sorted(results_df['final_nodes'].unique()):
    subset = results_df[results_df['final_nodes'] == nodes]
    plt.scatter(subset['edges_per_step'], subset['char_path_length'], 
                label=f'N={nodes}', alpha=0.7)
plt.xlabel('Edges Per Step (m)')
plt.ylabel('Characteristic Path Length')
plt.title('Edges Per Step vs. Path Length')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 3: Final nodes vs. Avg Clustering
plt.subplot(2, 2, 3)
plt.scatter(results_df['final_nodes'], results_df['avg_clustering'], 
            c=results_df['edges_per_step'], cmap='viridis', alpha=0.7)
plt.colorbar(label='Edges Per Step')
plt.xlabel('Final Nodes')
plt.ylabel('Avg. Clustering Coefficient')
plt.title('Network Size vs. Clustering')
plt.grid(True, alpha=0.3)

# Plot 4: Avg Degree vs. Char Path Length
plt.subplot(2, 2, 4)
plt.scatter(results_df['avg_degree'], results_df['char_path_length'], 
            c=results_df['final_nodes'], cmap='plasma', alpha=0.7)
plt.colorbar(label='Network Size')
plt.xlabel('Average Degree')
plt.ylabel('Characteristic Path Length')
plt.title('Avg. Degree vs. Path Length')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('network_analysis.png')
plt.close()

print("Analysis complete. Results saved to CSV and plots generated.")
results_df.to_csv('barabasi_albert_results.csv', index=False)

# Demonstrate a single network visualization
G = barabasi_albert_model(initial_nodes=10, final_nodes=100, edges_per_step=2)
analysis = analyze_network(G)

print(f"\nSample Network Analysis:")
print(f"Nodes: 100, Initial Size: 10, Edges per step: 2")
print(f"Average Clustering Coefficient: {analysis['avg_clustering']:.4f}")
print(f"Characteristic Path Length: {analysis['char_path_length']:.4f}")
print(f"Number of edges: {G.number_of_edges()}")
print(f"Average degree: {2 * G.number_of_edges() / G.number_of_nodes():.2f}")

# Visualize sample network
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G, seed=42)
degrees = dict(G.degree())
node_sizes = [50 + 10 * degrees[node] for node in G.nodes()]
nodes = nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=list(degrees.values()), cmap='viridis', alpha=0.7)
nx.draw_networkx_edges(G, pos, edge_color='gray', width=0.5)
plt.colorbar(nodes, label='Node Degree')
plt.title('Barabási-Albert Scale-Free Network (N=100, m=2)')
plt.axis('off')
plt.savefig('sample_network.png')
plt.close()

# Plot degree distribution with power-law fit for the sample network
degrees = [G.degree(n) for n in G.nodes()]
degree_counts = Counter(degrees)
k_values = sorted(degree_counts.keys())
pk_values = [degree_counts[k]/G.number_of_nodes() for k in k_values]

plt.figure(figsize=(8, 6))
plt.loglog(k_values, pk_values, 'bo', markersize=8, alpha=0.7, label='Observed')

# Simple power law fit (P(k) ~ k^-gamma)
from scipy.optimize import curve_fit

def power_law(x, a, gamma):
    return a * (x ** -gamma)

# Filter out zeros for log fitting
nonzero_indices = [i for i, pk in enumerate(pk_values) if pk > 0 and k_values[i] > 0]
x_fit = [k_values[i] for i in nonzero_indices]
y_fit = [pk_values[i] for i in nonzero_indices]

if len(x_fit) > 2:  # Need at least 3 points for a decent fit
    params, _ = curve_fit(power_law, x_fit, y_fit)
    x_range = np.logspace(np.log10(min(x_fit)), np.log10(max(x_fit)), 100)
    plt.loglog(x_range, power_law(x_range, *params), 'r-', 
               label=f'Power law fit γ={params[1]:.2f}')
    plt.legend()

plt.xlabel('Degree k (log scale)')
plt.ylabel('P(k) (log scale)')
plt.title('Degree Distribution with Power Law Fit')
plt.grid(True, alpha=0.3)
plt.savefig('degree_distribution_fit.png')
plt.close()

100%|██████████| 100/100 [00:09<00:00, 10.02it/s]


Summary Statistics:
       initial_nodes  final_nodes  edges_per_step  avg_clustering  \
count     100.000000   100.000000      100.000000      100.000000   
mean       12.100000   260.000000        2.460000        0.147877   
std         5.603931   145.643816        1.410065        0.131004   
min         5.000000   100.000000        1.000000        0.000179   
25%         8.750000   100.000000        1.000000        0.050497   
50%        10.000000   200.000000        2.000000        0.127463   
75%        16.250000   300.000000        3.000000        0.214449   
max        20.000000   500.000000        5.000000        0.642722   

       char_path_length  total_edges  avg_degree  
count        100.000000   100.000000  100.000000  
mean           3.119323   700.900000    5.519967  
std            0.719038   559.146346    2.703159  
min            2.148687   105.000000    2.020000  
25%            2.641889   315.000000    3.775000  
50%            2.963828   490.000000    5.200000  
7