<a href="https://colab.research.google.com/github/AUT-Student/CN-HW1/blob/main/ComplexNetwork_HW1_Q4_Q5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Refrence: https://hautahi.com/im_greedycelf

# Graph

In [1]:
from dataclasses import dataclass

@dataclass(frozen=True)
class Edge():
  """ This class stores data of an directed edge
  
  Args:
    source: The source of an edge
    destination: The destination of an edge

  Note: Because of undirected manner, the source and the destination can be swap. 
  """
  source: int
  destination: int

In [2]:
class Graph():
  """ This class stores data of a directed graph. Furtermore some functions defines here"""
  def __init__(self):
    self.nodes = set()
    self.edges = set()
    self.neighbour_set = dict()

  def add_node(self, node):
    self.nodes.add(node)

    if node not in self.neighbour_set:
      self.neighbour_set[node] = set()

  def add_edge(self, edge:Edge):
    self.add_node(edge.source)
    self.add_node(edge.destination)

    self.neighbour_set[edge.source].add(edge.destination)
    self.edges.add(edge)

  def remove_edge(self, edge:Edge):
    self.neighbour_set[edge.source].remove(edge.destination)
    self.edges.remove(edge)

  def is_exist(self, edge:Edge):
    return edge in self.edges

  def get_number_nodes(self):
    return len(self.nodes)

  def get_number_edges(self):
    return len(self.edges)

In [3]:
# Refrence: https://snap.stanford.edu/data/ca-AstroPh.html

!gdown https://snap.stanford.edu/data/ca-AstroPh.txt.gz
!gzip -d ./ca-AstroPh.txt.gz

with open("/content/ca-AstroPh.txt", "r") as f:
  graph = Graph()

  lines = f.readlines()

  for line in lines[4:]:
    nodes = line.split("\t")
    graph.add_edge(Edge(source=int(nodes[0]), destination=int(nodes[1])))

Downloading...
From: https://snap.stanford.edu/data/ca-AstroPh.txt.gz
To: /content/ca-AstroPh.txt.gz
100% 1.45M/1.45M [00:00<00:00, 1.48MB/s]


In [4]:
graph.get_number_edges()

396160

In [5]:
graph.get_number_nodes()

18772

# Hill Climbing

In [6]:
import random
import numpy as np
def influence(graph:Graph, initial_nodes,
              probability=0.001, number_realization=1):

  effected_node_numbers = []
  for _ in range(number_realization):
    effected_nodes = set()
    influencers = initial_nodes

    while(len(influencers)>0):
      new_influencers = set()

      for influencer in influencers:
        effected_nodes.add(influencer)

      for influencer in influencers:
        neighbours = graph.neighbour_set[influencer]

        for neighbour in neighbours:
          if neighbour not in effected_nodes and random.random() < probability:
            new_influencers.add(neighbour)

      influencers = list(new_influencers)

    effected_node_numbers.append(len(effected_nodes))
  
  return np.average(effected_node_numbers)

In [13]:
def outbreak(graph:Graph, initial_nodes,
             probability=0.001, number_realization=1):
 
  sensor_nodes = initial_nodes
  
  for _ in range(number_realization):
    effected_nodes = set()
    initial_outbreak_nodes = random.sample(graph.nodes, 10)
    effected_node_numbers = []

    for outbreak_node in initial_outbreak_nodes:
      influencers = [outbreak_node]
      outbreak_detected = False

      while(len(influencers)>0 and not outbreak_detected):
        new_influencers = set()

        for influencer in influencers:
          effected_nodes.add(influencer)

        for influencer in influencers:
          neighbours = graph.neighbour_set[influencer]

          for neighbour in neighbours:
            if neighbour not in effected_nodes and random.random() < probability:
              new_influencers.add(neighbour)

              if neighbour in sensor_nodes:
                outbreak_detected = True

        influencers = list(new_influencers)

    effected_node_numbers.append(len(effected_nodes))
  
  not_effected_node_numbers = len(graph.nodes) - np.average(effected_node_numbers)

  return not_effected_node_numbers * 0.25

In [8]:
influence(graph=graph, initial_nodes=[3, 65540, 65541, 4, 32776, 32777], probability=0.01, number_realization=10000)

8.7486

In [12]:
outbreak(graph=graph, initial_nodes=[3, 65540, 65541, 4, 32776, 32777], probability=0.1, number_realization=10)

12684.0

In [15]:
node_costs = dict()
for node in graph.nodes:
  node_costs[node] = 1

In [14]:
from copy import deepcopy
import math
def hill_climbing(graph:Graph,
                  budget:float,
                  benefit_function,
                  cost_normal:bool,
                  node_costs:dict,
                  probability=0.001,
                  number_realization=1):
  """
  Args:
    graph(Graph): The graph
    budget(float): The maximum treshold for costs of selected nodes.
    benefit_function(function): The benefit function (f)
    probability(float): The activation probability in the influence function. (See function influence)
    number_realization(int): The number of realization in the influence function. (See function influence)
    const_normal(bool): If this arg equals to true, the marginal gain will normalize with cost of each node.
    node_costs(dict): The dictionary that specify the selection cost for each node.

  Returns:
    list: The top nodes with maximum influence
  """
  
  top_nodes = list()
  remain_budget = budget

  while(True):
    best_marginal_benefit = -math.inf
    best_new_node = None

    for node in graph.nodes:
      if node in top_nodes or node_costs[node] > remain_budget: continue

      initial_nodes = deepcopy(top_nodes)
      initial_nodes.append(node)

      marginal_benefit = benefit_function(graph=graph,
                                                initial_nodes=initial_nodes,
                                                probability=probability,
                                                number_realization=number_realization)

      if cost_normal:
        marginal_benefit /= node_costs[node]

      if marginal_benefit > best_marginal_benefit:
        best_marginal_benefit = marginal_benefit
        best_new_node = node

    
    if best_new_node is not None:
      top_nodes.append(best_new_node)
      remain_budget -= node_costs[best_new_node]
      print(f"top_nodes = {top_nodes}")

    if remain_budget == 0 or best_new_node is None:
      break

  return top_nodes

In [None]:
hill_climbing(graph=graph,
              budget=10,
              benefit_function=influence,
              probability=0.01,
              number_realization=5, 
              cost_normal=False, node_costs=node_costs)

top_nodes = [47968]
top_nodes = [47968, 24650]
top_nodes = [47968, 24650, 77858]
top_nodes = [47968, 24650, 77858, 111161]
top_nodes = [47968, 24650, 77858, 111161, 93504]
top_nodes = [47968, 24650, 77858, 111161, 93504, 99499]


In [None]:
[44322, 34023, 40773, 56914, 3169, 70866, 56669, 15615, 95784, 92163]


In [None]:
[124529, 53213, 113172, 84641, 86566, 52136, 75415, 5187, 118403, 11856]


In [None]:
[111161, 126106, 48436, 76406, 50911, 32009, 41554, 64974, 63401, 39542]
number_realization=5

In [None]:
for node in [124529, 53213, 113172, 84641, 86566, 52136, 75415, 5187, 118403, 11856]:
  print(len(graph.neighbour_set[node]))

224
504
34
2
29
4
6
15
9
9


In [None]:
for node in [111161, 126106, 48436, 76406, 50911, 32009, 41554, 64974, 63401, 39542]:
  print(len(graph.neighbour_set[node]))

350
1
2
7
41
6
153
9
7
8


In [None]:
influence(graph=graph, initial_nodes=[111161, 126106, 48436, 76406, 50911, 32009, 41554, 64974, 63401, 39542], probability=0.01, number_realization=5)

44.0

In [None]:
influence(graph=graph, initial_nodes=[44322, 34023, 40773, 56914, 3169, 70866, 56669, 15615, 95784, 92163], probability=0.01, number_realization=5)

51.0

In [None]:
influence(graph=graph, initial_nodes=random.sample(graph.nodes, 10), probability=0.01, number_realization=5)

12.8