In [2]:
import networkx as nx
from networkx.algorithms import approximation
import numpy as np
import pickle
import math
import random

In [3]:
# Graph generation distributions used for random datasets

# Some of the Erdos-Renyi distributions are named according to their edge
# probability falling above or below a threshold function for a certain property.
# More precisely, an edge probability of p = ln(n)/n is a sharp threshold function
# for connectedness, and for any k>=3, an edge probability of p = n^(-1) is a
# threshold function for containing a k-cycle.

def gen_connected(n, m):
  return erdos_renyi_gen(n, 2 * math.log(n) / n, m)

def gen_disconnected(n, m):
  return erdos_renyi_gen(n, 0.6 * math.log(n) / n, m)

def gen_constant_prob_25(n, m):
  return erdos_renyi_gen(n, 0.25, m)

def gen_constant_prob_50(n, m):
  return erdos_renyi_gen(n, 0.50, m)

def gen_contain_cycle(n, m):
  return erdos_renyi_gen(n, n ** (-0.7), m)

def gen_not_contain_cycle(n, m):
  return erdos_renyi_gen(n, n ** (-1.1), m)

def gen_ws_small_rewire_prob(n, m):
  return watts_strogatz_gen(n, int(math.log(n) ** 2), 0.1, m)

def gen_ws_large_rewire_prob(n, m):
  return watts_strogatz_gen(n, int(math.log(n) ** 2), 0.4, m)

def gen_ba_small(n,m):
  return barabasi_albert_gen(n, int(math.log(n)), m)

def gen_ba_large(n,m):
  return barabasi_albert_gen(n, int(1.5 * math.log(n)), m)

graph_distributions = [gen_connected, gen_disconnected, gen_constant_prob_25,
                       gen_constant_prob_50, gen_contain_cycle, gen_not_contain_cycle,
                       gen_ws_small_rewire_prob, gen_ws_large_rewire_prob,
                       gen_ba_small, gen_ba_large]

# Dense graph generation
def gen_constant_prob_75(n, m):
  return erdos_renyi_gen(n, 0.75, m)

def gen_constant_prob_90(n, m):
  return erdos_renyi_gen(n, 0.90, m)

In [4]:
# Helpful functions for generating graphs, storing/loading graphs, and checking
# for duplicates

def erdos_renyi_gen(n, p, m):
  """
  Generate m Erdos-Renyi graphs with n nodes and probability p.
  Returns a list of NetworkX graphs.
  """
  graphs = []
  for i in range(m):
    graphs.append(nx.erdos_renyi_graph(n, p))
  return graphs

def watts_strogatz_gen(n, k, p, m):
  """
  Generate m Watts-Strogatz graphs with n nodes, k joined neighbors in initial
  configuration, and rewiring probability p.
  Returns a list of NetworkX graphs.
  """
  graphs = []
  for i in range(m):
    graphs.append(nx.watts_strogatz_graph(n, k, p))
  return graphs

def barabasi_albert_gen(n, k, m):
  """
  Generate m Barabasi-Albert graphs with n nodes with k edges attached at each
  stage.
  Returns a list of NetworkX graphs.
  """
  graphs = []
  for i in range(m):
    graphs.append(nx.barabasi_albert_graph(n, k))
  return graphs

def graph_gen(n, M):
  """
  Generates M random graphs with n nodes, sampled evenly between the
  distributions in graph_distributions.
  Returns a list of NetworkX graphs.
  """
  m = int(M / len(graph_distributions))
  graphs = []
  for distr in graph_distributions:
    graphs += distr(n, m)
  return graphs

def store_graphs(filename, graphs):
  """
  Stores a list of Networkx graphs in a pickle files.
  Graphs are first converted to a list of dictionaries,
  where each dictionary encodes the adjacency matrix.
  """
  dict_list = []
  for graph in graphs:
    dict_list.append(nx.to_dict_of_lists(graph))
  with open(filename, 'wb') as f:
    pickle.dump(dict_list, f)

def load_graphs(filename):
  """
  Loads graphs from the pickle file (described above).
  Returns a list of NetworkX graphs.
  """
  with open(filename, 'rb') as f:
    dict_list = pickle.load(f)
  graphs = [nx.Graph(graph) for graph in dict_list]
  return graphs

In [8]:
# EXAMPLE
# Generate random dataset with ~500 graphs with orders between 8-20

# Set random seed so that dataset can be reproduced
random.seed(3396)
np.random.seed(1307)

dataset = []
for n in range(8,21):
  dataset += graph_gen(n, 40)
dataset_filename = "Random_Dataset_N8-20_Size_520a"
store_graphs(dataset_filename, dataset)