<a href="https://colab.research.google.com/github/CO17502/Graph-Analysis/blob/main/Paper_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Set covering greedy on NIIPS dataset**

In [None]:
import pandas as pd
import numpy as np

# load the Papers.csv and Authors.csv datasets into pandas dataframes
papers_df = pd.read_csv('Papers.csv')
authors_df = pd.read_csv('Authors.csv')

# create a dictionary that maps paper IDs to author IDs
paper_author_dict = {}
for index, row in authors_df.iterrows():
    paper_id = row['Id']
    author_id = row['Id']
    if paper_id in paper_author_dict:
        paper_author_dict[paper_id].add(author_id)
    else:
        paper_author_dict[paper_id] = {author_id}

# create a list of all authors
all_authors = set(authors_df['Id'])

def get_coverage_cost(node_set):
    # compute the cost of a set of nodes based on the number of uncovered papers
    uncovered_papers = set()
    for paper_id, authors in paper_author_dict.items():
        if not authors.issubset(node_set):
            uncovered_papers.add(paper_id)
    return len(uncovered_papers)

def greedy_algorithm(k):
    # run the set covering greedy algorithm to select k nodes with the highest coverage
    seed_set = set()
    while len(seed_set) < k:
        best_node = None
        best_cost = np.inf
        for node in all_authors - seed_set:
            node_set = seed_set.union({node})
            cost = get_coverage_cost(node_set)
            if cost < best_cost:
                best_node = node
                best_cost = cost
                print(best_cost)
        seed_set.add(best_node)
    return seed_set

# example usage:
k = 10
seed_set = greedy_algorithm(k)
print("Seed set with highest coverage:", seed_set)


Example 2

**Enhanced degree centrality on Facebook dataset**

In [None]:
import networkx as nx
import pandas as pd

def enhanced_degree_centrality(G):
    n = len(G.nodes())
    dc = nx.degree_centrality(G)
    ndc = {}
    for node in G.nodes():
        ndc[node] = sum([dc[neighbor] for neighbor in G.neighbors(node)])
    edc = {}
    for node in G.nodes():
        edc[node] = (dc[node] + ndc[node]) / (2 * n - 1)
    return edc


def main():
  # Read the text file.
  with open("/content/ca-AstroPh.txt") as f:
    for _ in range(4):
      f.readline()
    data = f.readlines()

    # Create an empty directed graph
    G = nx.DiGraph()

    # Iterate through the lines of the file
    for line in data:
      # Split the line into the source and target nodes
      source, target = line.strip().split()

      # Add an edge to the graph
      G.add_edge(source, target)

  # Close the file
  f.close()

  # Calculate EDC scores for all nodes in the graph
  edc_scores = enhanced_degree_centrality(G)

  # Print the top 10 most influential nodes based on EDC scores
  top_nodes = sorted(edc_scores.items(), key=lambda x: x[1], reverse=True)[:10]
  for node, score in top_nodes:
    print("Node {}: EDC score = {}".format(node, score))

main()
