SNA Project Round - 1 

Team: TekloTekloTingTiiiing

Members: 

1) 21ucs204 - Sindhi Krish Kamal
2) 21ucs158 - Prashant Singh Shekhawat
3) 21ucs237 - Yashodhan Sonune
4) 21ucs183 - Sarvagya Acharya

Datasets chosen:
- US Power Grid ( http://konect.cc/networks/opsahl-powergrid/ )
- Yeast network ( http://konect.cc/networks/moreno_propro/ )


In [None]:
#importing all the important libraries
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import random


In [None]:
#Function to load the undirected datasets
def load_Undirected_Graph(file_path):
    """
    Load dataset from file path
    """
    # Depending on the format of your dataset, you might need to adjust the loading method
    # For example, if it's an edgelist, you can use nx.read_edgelist()
    G = nx.read_edgelist(file_path)
    return G

In [None]:
# Function to summarize network statistics
def summarize_network_stats(G):
    """
    Summarize network statistics
    """
    # Degree distribution
    degrees = dict(G.degree())
    degree_values = list(degrees.values())
    max_degree = max(degree_values)
    min_degree = min(degree_values)
    avg_degree = sum(degree_values) / len(G)
    std_degree = pd.Series(degree_values).std()

    #Plot degree distribution
    plt.hist(degree_values, bins=20, color='skyblue')
    plt.title("Degree Distribution")
    plt.xlabel("Degree")
    plt.ylabel("Frequency")
    plt.show()

    # Plot kernel density estimation (KDE) plot of node degree
    sns.kdeplot(degree_values, color='skyblue', fill=True)
    plt.xlabel('Degree')
    plt.ylabel('Density')
    plt.title('Node Degree Distribution (KDE plot)')
    plt.show()

    # Output statistics
    print("Max Degree:", max_degree)
    print("Min Degree:", min_degree)
    print("Average Degree:", avg_degree)
    print("Standard Deviation of Degree Distribution:", std_degree)
    print("Total number of Nodes in the graph:", G.number_of_nodes())
    print("Total number of Edges in the graph:", G.number_of_edges())



In [None]:
# Function to calculate centrality measures
def calculate_centrality_measures(G):
    """
    Calculate centrality measures
    """
    # Degree centrality
    degree_centrality = nx.degree_centrality(G)
    print("Degree Centrality:")
    print(degree_centrality)

    # Plot histogram of degree centrality
    plt.hist(degree_centrality.values(), bins=20, color='skyblue')
    plt.title("Degree Centrality Distribution")
    plt.xlabel("Degree Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Plot node ID versus degree centrality
    plt.scatter(list(degree_centrality.keys()), list(degree_centrality.values()), color='black', alpha=0.5)
    plt.title("Node ID vs Degree Centrality")
    plt.xlabel("Node ID")
    plt.ylabel("Degree Centrality")
    plt.show()

    # Eigenvector centrality
    eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
    print("\nEigenvector Centrality:")
    print(eigenvector_centrality)

    # Plot histogram of eigenvector centrality
    plt.hist(eigenvector_centrality.values(), bins=20, color='skyblue')
    plt.title("Eigenvector Centrality Distribution")
    plt.xlabel("Eigenvector Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Plot node ID versus eigenvector centrality
    plt.scatter(list(eigenvector_centrality.keys()), list(eigenvector_centrality.values()), color='black', alpha=0.5)
    plt.title("Node ID vs Eigenvector Centrality")
    plt.xlabel("Node ID")
    plt.ylabel("Eigenvector Centrality")
    plt.show()

    # Katz centrality
    katz_centrality = nx.katz_centrality(G)
    print("\nKatz Centrality:")
    print(katz_centrality)

    # Plot histogram of Katz centrality
    plt.hist(katz_centrality.values(), bins=20, color='skyblue')
    plt.title("Katz Centrality Distribution")
    plt.xlabel("Katz Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Plot node ID versus Katz centrality
    plt.scatter(list(katz_centrality.keys()), list(katz_centrality.values()), color='black', alpha=0.5)
    plt.title("Node ID vs Katz Centrality")
    plt.xlabel("Node ID")
    plt.ylabel("Katz Centrality")
    plt.show()

    # PageRank centrality
    pagerank_centrality = nx.pagerank(G)
    print("\nPageRank Centrality:")
    print(pagerank_centrality)

    # Plot histogram of PageRank centrality
    plt.hist(pagerank_centrality.values(), bins=20, color='skyblue')
    plt.title("PageRank Centrality Distribution")
    plt.xlabel("PageRank Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Plot node ID versus PageRank centrality
    plt.scatter(list(pagerank_centrality.keys()), list(pagerank_centrality.values()), color='black', alpha=0.5)
    plt.title("Node ID vs PageRank Centrality")
    plt.xlabel("Node ID")
    plt.ylabel("PageRank Centrality")
    plt.show()

    # Clustering coefficients
    local_clustering = nx.clustering(G)
    global_clustering = nx.average_clustering(G)
    print("\nLocal Clustering Coefficients:")
    print(local_clustering)
    print("\nGlobal Clustering Coefficient:")
    print(global_clustering)

    # Plot histogram of local clustering coefficients
    plt.hist(local_clustering.values(), bins=20, color='skyblue')
    plt.title("Local Clustering Coefficients Distribution")
    plt.xlabel("Local Clustering Coefficients")
    plt.ylabel("Frequency")
    plt.show()
    
    # Plot node ID versus local clustering centrality
    plt.scatter(list(local_clustering.keys()), list(local_clustering.values()), color='black', alpha=0.5)
    plt.title("Node ID vs Local Clustering Centrality")
    plt.xlabel("Node ID")
    plt.ylabel("Local Clustering Centrality")
    plt.show()

    # Betweenness centrality
    betweenness_centrality = nx.betweenness_centrality(G)
    print("\nBetweenness Centrality:")
    print(betweenness_centrality)

    # Plot histogram of betweenness centrality
    plt.hist(betweenness_centrality.values(), bins=20, color='skyblue')
    plt.title("Betweenness Centrality Distribution")
    plt.xlabel("Betweenness Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Plot node ID versus betweenness centrality
    plt.scatter(list(betweenness_centrality.keys()), list(betweenness_centrality.values()), color='black', alpha=0.5)
    plt.title("Node ID vs Betweenness Centrality")
    plt.xlabel("Node ID")
    plt.ylabel("Betweenness Centrality")
    plt.show()

    # Closeness centrality
    closeness_centrality = nx.closeness_centrality(G)
    print("\nCloseness Centrality:")
    print(closeness_centrality)

    # Plot histogram of closeness centrality
    plt.hist(closeness_centrality.values(), bins=20, color='skyblue')
    plt.title("Closeness Centrality Distribution")
    plt.xlabel("Closeness Centrality")
    plt.ylabel("Frequency")
    plt.show()

    # Plot node ID versus closeness centrality
    plt.scatter(list(closeness_centrality.keys()), list(closeness_centrality.values()), color='black', alpha=0.5)
    plt.title("Node ID vs Closeness Centrality")
    plt.xlabel("Node ID")
    plt.ylabel("Closeness Centrality")
    plt.show()

    # Reciprocity
    reciprocity = nx.reciprocity(G)
    print("\nReciprocity:")
    print(reciprocity)

    # Transitivity
    transitivity = nx.transitivity(G)
    print("\nTransitivity:")
    print(transitivity)

    return pd.DataFrame({"Degree Centrality": degree_centrality, "Eigenvector Centrality": eigenvector_centrality, "Katz Centrality": katz_centrality, "PageRank Centrality": pagerank_centrality, "Betweenness Centrality": betweenness_centrality, "Closeness Centrality": closeness_centrality})

In [None]:
# Main function
if __name__ == "__main__":
    # Load datasets
    dataset1_path = "D:\SNA_project\SNA_project\download.tsv.opsahl-powergrid\opsahl-powergrid\out.opsahl-powergrid"
    dataset2_path = "D:\SNA_project\SNA_project\download.tsv.moreno_propro\moreno_propro\out.moreno_propro_propro"
    G1 = load_Undirected_Graph(dataset1_path)
    G2 = load_Undirected_Graph(dataset2_path)

    # nx.draw_networkx_edges(G1, pos=nx.spring_layout(G1))
    # plt.savefig('powergrid.png')
    
    # nx.draw_networkx_edges(G2, pos=nx.spring_layout(G2))
    # plt.savefig('yeast.png')

    # Summarize network statistics
    print("Dataset 1 Statistics:")
    summarize_network_stats(G1)
    print("\nDataset 2 Statistics:")
    summarize_network_stats(G2)

    #Calculate centrality measures
    print("Dataset 1 centrality measures:")
    centrality_measures_1 = calculate_centrality_measures(G1)
    centrality_stats1 = centrality_measures_1.describe()
    # Plot heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(centrality_stats1, annot=True, fmt=".2f", cmap="YlGnBu")
    plt.title("Descriptive Statistics of Centrality Measures for Network-1")
    plt.show()

    print("\nDataset 2 centrality measures:")
    centrality_measures_2 = calculate_centrality_measures(G2)
    centrality_stats2 = centrality_measures_2.describe()
    # Plot heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(centrality_stats2, annot=True, fmt=".2f", cmap="YlGnBu")
    plt.title("Descriptive Statistics of Centrality Measures for Network-2")
    plt.show()