SNA Project Round - 1 

Team: TekloTekloTingTiiiing

Members: 

1) 21ucs204 - Sindhi Krish Kamal
2) 21ucs158 - Prashant Singh Shekhawat
3) 21ucs237 - Yashodhan Sonune
4) 21ucs183 - Sarvagya Acharya

Datasets chosen:
- Air Traffic Control ( http://konect.cc/networks/maayan-faa/ )
- Yeast network ( http://konect.cc/networks/moreno_propro/ )


In [None]:
#importing all the important libraries
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import snap
import random


In [None]:
#Function to load the undirected datasets
def load_Undirected_Graph(file_path):
    """
    Load dataset from file path
    """
    # Depending on the format of your dataset, you might need to adjust the loading method
    # For example, if it's an edgelist, you can use nx.read_edgelist()
    G = nx.read_edgelist(file_path)
    return G

In [None]:

#Function to load the directed datasets
def load_Directed_graph(file_path):
    """
    Load dataset from file path
    """
    # Depending on the format of your dataset, you might need to adjust the loading method
    # For example, if it's an edgelist, you can use nx.read_edgelist()
    G = nx.read_edgelist(file_path, create_using=nx.DiGraph())
    return G

In [None]:
# Function to summarize network statistics
def summarize_network_stats(G):
    """
    Summarize network statistics
    """
    # Degree distribution
    degrees = dict(G.degree())
    degree_values = list(degrees.values())
    max_degree = max(degree_values)
    min_degree = min(degree_values)
    avg_degree = sum(degree_values) / len(G)
    std_degree = pd.Series(degree_values).std()

    # Plot degree distribution
    # plt.hist(degree_values, bins=20, color='skyblue')
    # plt.title("Degree Distribution")
    # plt.xlabel("Degree")
    # plt.ylabel("Frequency")
    # plt.show()

    # Plot kernel density estimation (KDE) plot of node degree
    sns.kdeplot(degree_values, color='skyblue', fill=True)
    plt.xlabel('Degree')
    plt.ylabel('Density')
    plt.title('Node Degree Distribution (KDE plot)')
    plt.show()


    # Output statistics
    print("Max Degree:", max_degree)
    print("Min Degree:", min_degree)
    print("Average Degree:", avg_degree)
    print("Standard Deviation of Degree Distribution:", std_degree)


In [None]:
# Function to calculate centrality measures
def calculate_centrality_measures(G):
    """
    Calculate centrality measures
    """
    # Degree centrality
    degree_centrality = nx.degree_centrality(G)
    print("Degree Centrality:")
    print(degree_centrality)

    # Plot histogram of degree centrality
    plt.hist(degree_centrality.values(), bins=20, color='skyblue')
    plt.title("Degree Centrality Distribution")
    plt.xlabel("Degree Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Eigenvector centrality
    eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
    print("\nEigenvector Centrality:")
    print(eigenvector_centrality)

    # Plot histogram of eigenvector centrality
    plt.hist(eigenvector_centrality.values(), bins=20, color='skyblue')
    plt.title("Eigenvector Centrality Distribution")
    plt.xlabel("Eigenvector Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Katz centrality
    katz_centrality = nx.katz_centrality(G)
    print("\nKatz Centrality:")
    print(katz_centrality)

    # Plot histogram of Katz centrality
    plt.hist(katz_centrality.values(), bins=20, color='skyblue')
    plt.title("Katz Centrality Distribution")
    plt.xlabel("Katz Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # PageRank centrality
    pagerank_centrality = nx.pagerank(G)
    print("\nPageRank Centrality:")
    print(pagerank_centrality)

    # Plot histogram of PageRank centrality
    plt.hist(pagerank_centrality.values(), bins=20, color='skyblue')
    plt.title("PageRank Centrality Distribution")
    plt.xlabel("PageRank Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Clustering coefficients
    local_clustering = nx.clustering(G)
    global_clustering = nx.average_clustering(G)
    print("\nLocal Clustering Coefficients:")
    print(local_clustering)
    print("\nGlobal Clustering Coefficient:")
    print(global_clustering)

    # Plot histogram of local clustering coefficients
    plt.hist(local_clustering.values(), bins=20, color='skyblue')
    plt.title("Local Clustering Coefficients Distribution")
    plt.xlabel("Local Clustering Coefficients")
    plt.ylabel("Frequency")
    plt.show()

    # Betweenness centrality
    betweenness_centrality = nx.betweenness_centrality(G)
    print("\nBetweenness Centrality:")
    print(betweenness_centrality)

    # Plot histogram of betweenness centrality
    plt.hist(betweenness_centrality.values(), bins=20, color='skyblue')
    plt.title("Betweenness Centrality Distribution")
    plt.xlabel("Betweenness Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Closeness centrality
    closeness_centrality = nx.closeness_centrality(G)
    print("\nCloseness Centrality:")
    print(closeness_centrality)

    # Plot histogram of closeness centrality
    plt.hist(closeness_centrality.values(), bins=20, color='skyblue')
    plt.title("Closeness Centrality Distribution")
    plt.xlabel("Closeness Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Reciprocity
    reciprocity = nx.reciprocity(G)
    print("\nReciprocity:")
    print(reciprocity)

    # Transitivity
    transitivity = nx.transitivity(G)
    print("\nTransitivity:")
    print(transitivity)

In [None]:
# Function to visualize centrality measures
def visualize_centrality_measures_snap(G, centrality_measure):
    # Calculate centrality measure
    if centrality_measure == "Degree Centrality":
        centrality_values = snap.TIntFltH()
        for Node in G.Nodes():
            centrality_values[Node.GetId()] = G.GetDegreeCentr(Node.GetId())
        #snap.GetDegreeCentr(G, centrality_values)
    elif centrality_measure == "Eigenvector Centrality":
        centrality_values = snap.TIntFltH()
        snap.GetEigenVectorCentr(G, centrality_values)
    elif centrality_measure == "Katz Centrality":
        centrality_values = snap.TIntFltH()
        snap.GetKatzCentr(G, centrality_values)
    elif centrality_measure == "PageRank Centrality":
        centrality_values = snap.TIntFltH()
        snap.GetPageRank(G, centrality_values)
    elif centrality_measure == "Betweenness Centrality":
        Nodes, Edges = snap.GetBetweennessCentr(G, centrality_values)
    elif centrality_measure == "Closeness Centrality":
        centrality_values = snap.TIntFltH()
        snap.GetClosenessCentr(G, centrality_values)
    elif centrality_measure == "Reciprocity":
        reciprocity = snap.GetReciprocity(G)
        centrality_values = {node: reciprocity for node in G.Nodes()}
    elif centrality_measure == "Transitivity":
        transitivity = snap.GetClustCf(G)
        centrality_values = {node: transitivity for node in G.Nodes()}
    
    # Draw graph with node attributes
    labels = snap.TIntStrH()
    for node in G.Nodes():
        labels[node.GetId()] = str(node.GetId())
    snap.DrawGVizAttributes(G, snap.gvlDot, "graph.png", centrality_measure, labels)


In [None]:
# Function to visualize centrality measures
def visualize_centrality_measures(G, centrality_measures):
    num_nodes_to_keep = 100  # Number of nodes to keep in the subsampled graph
    subgraph_nodes = random.sample(G.nodes(), num_nodes_to_keep)  # Randomly select nodes
    subgraph = G.subgraph(subgraph_nodes)  
    for centrality_measure in centrality_measures:
        # Calculate centrality values
        if centrality_measure == "Degree Centrality":
            centrality_values = nx.degree_centrality(subgraph)
        elif centrality_measure == "Eigenvector Centrality":
            centrality_values = nx.eigenvector_centrality(subgraph, max_iter=1000)
        elif centrality_measure == "Katz Centrality":
            centrality_values = nx.katz_centrality(subgraph)
        elif centrality_measure == "PageRank Centrality":
            centrality_values = nx.pagerank(subgraph)
        elif centrality_measure == "Local Clustering Coefficients":
            centrality_values = nx.clustering(subgraph)
        elif centrality_measure == "Global Clustering Coefficient":
            centrality_values = nx.average_clustering(subgraph)
        elif centrality_measure == "Betweenness Centrality":
            centrality_values = nx.betweenness_centrality(subgraph)
        elif centrality_measure == "Closeness Centrality":
            centrality_values = nx.closeness_centrality(subgraph)
        elif centrality_measure == "Reciprocity":
            centrality_values = nx.reciprocity(subgraph)
        elif centrality_measure == "Transitivity":
            centrality_values = nx.transitivity(subgraph)
        else:
            print("Invalid centrality measure:", centrality_measure)
            continue
        
        # Define node colors or sizes based on centrality values
        node_colors = [centrality_values[node] for node in subgraph.nodes()]
        node_sizes = [1000 * centrality_values[node] for node in subgraph.nodes()]
        
        # Draw the graph with node attributes
        pos = nx.spring_layout(subgraph)  # Define node positions using a layout algorithm
        nx.draw(subgraph, pos, node_color=node_colors, node_size=node_sizes, with_labels=True)
        plt.title(centrality_measure)
        plt.show()


In [None]:
# Main function
if __name__ == "__main__":
    # Load datasets
    dataset1_path = "D:\SNA_project\SNA_project\download.tsv.opsahl-powergrid\opsahl-powergrid\out.opsahl-powergrid"
    dataset2_path = "D:\SNA_project\SNA_project\download.tsv.moreno_propro\moreno_propro\out.moreno_propro_propro"
    G1 = load_Undirected_Graph(dataset1_path)
    G2 = load_Undirected_Graph(dataset2_path)

    G1_snap = snap.LoadEdgeList(snap.PUNGraph, dataset1_path, 0, 1)
    G2_snap = snap.LoadEdgeList(snap.PUNGraph, dataset2_path, 0, 1)


    # Summarize network statistics
    print("Dataset 1 Statistics:")
    summarize_network_stats(G1)
    print("\nDataset 2 Statistics:")
    summarize_network_stats(G2)

    #Calculate centrality measures
    print("Dataset 1 centrality measures:")
    centrality_measures_1 = calculate_centrality_measures(G1)
    print("\nDataset 2 centrality measures:")
    centrality_measures_2 = calculate_centrality_measures(G2)

 
    # centrality_measures = [
    #     "Degree Centrality", "Eigenvector Centrality", "Katz Centrality",
    #     "PageRank Centrality", "Betweenness Centrality", "Closeness Centrality",
    #     "Reciprocity", "Transitivity"
    # ]
    # #print(f"Visualizing {centrality_measure} for Dataset 1")
    # visualize_centrality_measures(G1, centrality_measures)
    # #print(f"Visualizing {centrality_measure} for Dataset 2")
    # visualize_centrality_measures(G2, centrality_measures)
