In [1]:
#run these commands to install graph-tool for colab
!pip install -q condacolab
import condacolab
condacolab.install()
!mamba install -q graph-tool

✨🍰✨ Everything looks OK!


In [2]:
#other imports
import graph_tool.all as gt
import matplotlib.pyplot as plt
import argparse
import os
import datetime
import numpy as np
from scipy import stats

In [3]:
#load a graphml file and split into four subgraphs based on game type
graphml_file = 'converted_13wtl.graphml'
gX = gt.load_graph(graphml_file)
gB = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='B',value_type="bool"))
gB = gt.GraphView(gB, vfilt=gB.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
gC = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='C',value_type="bool"))
gC = gt.GraphView(gC, vfilt=gC.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
gN = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='N',value_type="bool"))
gN = gt.GraphView(gN, vfilt=gN.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
gZ = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='Z',value_type="bool"))
gZ = gt.GraphView(gZ, vfilt=gZ.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
display(gX,gB,gC,gN,gZ)
for g,name in [(gX,''),(gB,' - B'),(gC,' - C'),(gN,' - N'),(gZ,' - Z')]:
  #display minimum and maximum degree for each graph - min should be positive (every subgraph had the isoalted vertices removed)
  display(min(g.degree_property_map("total")),max(g.degree_property_map("total")))

<Graph object, directed, with 4922 vertices and 121332 edges, 1 internal vertex property, 3 internal edge properties, at 0x7af1636a21d0>

<GraphView object, directed, with 1858 vertices and 32986 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7af163643c90, at 0x7af1636c3d10>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7af163643c90, at 0x7af1636c3c10>, at 0x7af163643c90>

<GraphView object, directed, with 3526 vertices and 41796 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7af1636c3e50, at 0x7af1636c2650>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7af1636c3e50, at 0x7af1636b00d0>, at 0x7af1636c3e50>

<GraphView object, directed, with 113 vertices and 266 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7af1666ae810, at 0x7af1636b06d0>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7af1666ae810, at 0x7af1636b0750>, at 0x7af1666ae810>

<GraphView object, directed, with 3314 vertices and 46284 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7af1667534d0, at 0x7af1636b0cd0>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7af1667534d0, at 0x7af1636b0290>, at 0x7af1667534d0>

1

1729

1

1728

1

805

1

86

1

1582

In [4]:
#run for the original graph and every subgraph
for g,gametitle in [(gX,'all games'),(gB,'bullet games'),(gC,'classical games'),(gN,'correspondence games'),(gZ,'blitz games')]:

  #set the target directory here
  output_directory = 'result_13wtl_'+gametitle

  if not os.path.exists(output_directory):
      os.makedirs(output_directory)

  degrees = g.degree_property_map("total")
  min_degree = min(degrees)
  max_degree = max(degrees)

  indegree = g.degree_property_map("in")
  min_indegree = min(indegree)
  max_indegree = max(indegree)

  outdegree = g.degree_property_map("out")
  min_outdegree = min(outdegree)
  max_outdegree = max(outdegree)

  mean_degree = 2 * g.num_edges() / g.num_vertices()
  mean_indegree = g.num_edges() / g.num_vertices()
  density = mean_degree / (g.num_vertices() - 1)

  global_clustering_coefficient = gt.global_clustering(g)
  reciprocity = gt.edge_reciprocity(g)

  with open(f'{output_directory}/log.txt', 'w') as f:
      print(f'Graph located at: {graphml_file}',file=f)
      print(f'starting time: {datetime.time()}',file=f)
      print('==========================',file=f)
      print(f'Number of vertices: {g.num_vertices()}', file=f)
      print(f'Number of edges: {g.num_edges()}', file=f)
      print(f'Min degree: {min_degree}', file=f)
      print(f'Max degree: {max_degree}', file=f)
      print(f'Mean degree: {mean_degree}', file=f)
      print(f'Min indegree: {min_indegree}', file=f)
      print(f'Max indegree: {max_indegree}', file=f)
      print(f'Min outdegree: {min_outdegree}', file=f)
      print(f'Max outdegree: {max_outdegree}', file=f)
      print(f'Mean indegree = Mean outdegree: {mean_indegree}', file=f)
      print(f'Density: {density}', file=f)
      print(f'Global clustering coefficient: {global_clustering_coefficient}', file=f)
      print(f'Reciprocity: {reciprocity}', file=f)
      print('==========================', file=f)
      print(f'ending time: {datetime.time()}',file=f)

      print('Statistics log created successfully.')
      print(f'Output file: {output_directory}.txt')

  edge_chance = g.num_edges()/(g.num_vertices()*(g.num_vertices()-1))

  # The following code is for plotting the degree distribution of the graph
  degree_distribution = g.degree_property_map("total")
  fig = plt.figure(figsize=(10, 6))
  plt.hist(degree_distribution.a, bins=50, color='blue', alpha=0.7,density=True)
  x = np.array(range(min(degree_distribution.a),max(degree_distribution.a)+1))
  plt.plot(x, stats.binom(2*(g.num_vertices()-1),edge_chance).pmf(x), 'r-', label='Expected degree density for Erdős–Rényi random graph')
  plt.title(f'Degree Distribution ({gametitle})')
  plt.xlabel('Degree')
  plt.ylabel('Frequency')
  plt.grid()
  #plt.yscale('log')
  fig.legend()
  fig.savefig(f'{output_directory}/degree_distribution {gametitle}.png')
  plt.close()
  display("DEGREES TEST")
  display(stats.kstest(degree_distribution.a, "binom", args=(2*(g.num_vertices()-1),edge_chance)))
  # The following code is for plotting the indegree distribution of the graph
  indegree_distribution = g.degree_property_map("in")
  fig = plt.figure(figsize=(10, 6))
  plt.hist(indegree_distribution.a, bins=50, color='green', alpha=0.7,density=True)
  x = np.array(range(min(indegree_distribution.a),max(indegree_distribution.a)+1))
  plt.plot(x, stats.binom(g.num_vertices()-1,edge_chance).pmf(x), 'r-', label='Expected indegree density for Erdős–Rényi random graph')
  plt.title(f'Indegree Distribution ({gametitle})')
  plt.xlabel('Indegree')
  plt.ylabel('Frequency')
  plt.grid()
  #plt.yscale('log')
  fig.legend()
  plt.savefig(f'{output_directory}/indegree_distribution {gametitle}.png')
  plt.close()
  display("INDEGREES TEST")
  display(stats.kstest(indegree_distribution.a, "binom", args=(g.num_vertices()-1,edge_chance)))
  # The following code is for plotting the outdegree distribution of the graph
  outdegree_distribution = g.degree_property_map("out")
  fig = plt.figure(figsize=(10, 6))
  plt.hist(outdegree_distribution.a, bins=50, color='red', alpha=0.7,density=True)
  x = np.array(range(min(outdegree_distribution.a),max(outdegree_distribution.a)+1))
  plt.plot(x, stats.binom(g.num_vertices()-1,edge_chance).pmf(x), 'r-', label='Expected outdegree density for Erdős–Rényi random graph')
  plt.title(f'Outdegree Distribution ({gametitle})')
  plt.xlabel('Outdegree')
  plt.ylabel('Frequency')
  plt.grid()
  #plt.yscale('log')
  fig.legend()
  plt.savefig(f'{output_directory}/outdegree_distribution {gametitle}.png')
  plt.close()
  display("OUTDEGREES TEST")
  display(stats.kstest(outdegree_distribution.a, "binom", args=(g.num_vertices()-1,edge_chance)))


  #reenable the rest of statistics if we know the expected distribution in a random graph

  continue



  # The following code is for plotting the clustering coefficient distribution of the graph
  clustering_coefficient_distribution = gt.local_clustering(g)
  plt.figure(figsize=(10, 6))
  plt.hist(clustering_coefficient_distribution.a, bins=50, color='purple', alpha=0.7)
  plt.title('Clustering Coefficient Distribution')
  plt.xlabel('Clustering Coefficient')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/clustering_coefficient_distribution.png')
  plt.close()
  # # The following code is for plotting the reciprocity distribution of the graph
  # reciprocity_distribution =
  # plt.figure(figsize=(10, 6))
  # plt.hist(reciprocity_distribution.a, bins=50, color='orange', alpha=0.7)
  # plt.title('Reciprocity Distribution')
  # plt.xlabel('Reciprocity')
  # plt.ylabel('Frequency')
  # plt.grid()
  # plt.savefig(f'{output_directory}/reciprocity_distribution.png')
  # plt.close()
  # The following code is for plotting the betweenness centrality distribution of the graph
  betweenness_centrality = gt.betweenness(g)
  vertex_betweenness = betweenness_centrality[0]
  edge_betweenness = betweenness_centrality[1]
  plt.figure(figsize=(10, 6))
  plt.hist(vertex_betweenness.a, bins=50, color='cyan', alpha=0.7)
  plt.title('Vertex Betweenness Centrality Distribution')
  plt.xlabel('Betweenness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/vertex_betweenness_centrality_distribution.png')
  plt.close()

  plt.figure(figsize=(10, 6))
  plt.hist(edge_betweenness.a, bins=50, color='cyan', alpha=0.7)
  plt.title('Edge Betweenness Centrality Distribution')
  plt.xlabel('Betweenness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/edge_betweenness_centrality_distribution.png')
  plt.close()
  # The following code is for plotting the closeness centrality distribution of the graph
  closeness_centrality = gt.closeness(g)
  plt.figure(figsize=(10, 6))
  plt.hist(closeness_centrality.a, bins=50, color='magenta', alpha=0.7)
  plt.title('Closeness Centrality Distribution')
  plt.xlabel('Closeness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.savefig(f'{output_directory}/closeness_centrality_distribution.png')
  plt.close()
  # The following code is for plotting the eigenvector centrality distribution of the graph
  eigenvector_centrality = gt.eigenvector(g)[1]
  plt.figure(figsize=(10, 6))
  plt.hist(eigenvector_centrality.a, bins=50, color='brown', alpha=0.7)
  plt.title('Eigenvector Centrality Distribution')
  plt.xlabel('Eigenvector Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/eigenvector_centrality_distribution.png')
  plt.close()
  # The following code is for plotting the PageRank centrality distribution of the graph
  pagerank_centrality = gt.pagerank(g)
  plt.figure(figsize=(10, 6))
  plt.hist(pagerank_centrality.a, bins=50, color='pink', alpha=0.7)
  plt.title('PageRank Centrality Distribution')
  plt.xlabel('PageRank Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/pagerank_centrality_distribution.png')
  plt.close()



Statistics log created successfully.
Output file: result_13wtl_all games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.7123707219688968), pvalue=np.float64(0.0), statistic_location=np.int32(33), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.6828214992298641), pvalue=np.float64(0.0), statistic_location=np.int32(14), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.6909482769624931), pvalue=np.float64(0.0), statistic_location=np.int32(14), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_13wtl_bullet games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.9142309719002929), pvalue=np.float64(0.0), statistic_location=np.int32(20), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.8949506453036866), pvalue=np.float64(0.0), statistic_location=np.int32(8), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.9075471507892616), pvalue=np.float64(0.0), statistic_location=np.int32(8), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_13wtl_classical games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.7575382013705558), pvalue=np.float64(0.0), statistic_location=np.int32(13), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.7161902761780463), pvalue=np.float64(0.0), statistic_location=np.int32(5), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.7310216455400943), pvalue=np.float64(0.0), statistic_location=np.int32(5), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_13wtl_correspondence games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.9684603330861864), pvalue=np.float64(0.0), statistic_location=np.int32(0), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.8931416460553507), pvalue=np.float64(0.0), statistic_location=np.int32(0), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.8900941044056149), pvalue=np.float64(0.0), statistic_location=np.int32(0), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_13wtl_blitz games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.8029429001450639), pvalue=np.float64(0.0), statistic_location=np.int32(16), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.7611216858746019), pvalue=np.float64(0.0), statistic_location=np.int32(6), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.77879742744307), pvalue=np.float64(0.0), statistic_location=np.int32(6), statistic_sign=np.int8(1))

In [5]:
def power_law_linreg(vals,counts):
  #gets an array of points, filters empty bins out
  #then returns a powerlaw curve to approximate the distribution
  fvals = vals[counts>0]
  fcounts = counts[counts>0]
  lvals = np.log(fvals)
  lcounts = np.log(fcounts)
  #weighted by count so that the curve mostly follows the taller bins, which represent more samples (and also tend to follow our desired curve more closely)
  a,b = np.polyfit(lvals, lcounts, deg=1, w=fcounts)
  #the resulting curve follows the function y = np.exp(b) * x^a
  return (np.exp(b),a)

def do_plot(values,color='blue'):
  #because we display loglog, we ignore any non-positive samples
  filtered_vals = values[values > 0]
  #evenly spaced out bins on logarithmic scale
  bin_locs = np.logspace(np.log10(min(filtered_vals)),np.log10(max(filtered_vals)))
  #the histogram returns the counts in every bin
  hist_result = plt.hist(values, bins=bin_locs, color=color, alpha=0.7)
  counts = hist_result[0]
  #draw the power law curve
  coef = power_law_linreg(bin_locs[:-1],counts)
  plt.plot(bin_locs,coef[0]*bin_locs**(coef[1]),'r')

#run for the original graph and every subgraph
for g,gametitle in [(gX,'all games'),(gB,'bullet games'),(gC,'classical games'),(gN,'correspondence games'),(gZ,'blitz games')]:

  #set the target directory here
  output_directory = 'result_13wtl_'+gametitle

  if not os.path.exists(output_directory):
      os.makedirs(output_directory)

  # The following code is for plotting the degree distribution of the graph
  degree_distribution = g.degree_property_map("total")
  plt.figure(figsize=(10, 6))
  do_plot(degree_distribution.a,color='blue')
  plt.title(f'Degree Distribution ({gametitle})')
  plt.xlabel('Degree')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/degree_distribution_loglog {gametitle}.png')
  plt.close()
  # The following code is for plotting the indegree distribution of the graph
  indegree_distribution = g.degree_property_map("in")
  plt.figure(figsize=(10, 6))
  do_plot(indegree_distribution.a,color='green')
  plt.title(f'Indegree Distribution ({gametitle})')
  plt.xlabel('Indegree')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/indegree_distribution_loglog {gametitle}.png')
  plt.close()
  # The following code is for plotting the outdegree distribution of the graph
  outdegree_distribution = g.degree_property_map("out")
  plt.figure(figsize=(10, 6))
  do_plot(outdegree_distribution.a,color='red')
  plt.title(f'Outdegree Distribution ({gametitle})')
  plt.xlabel('Outdegree')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/outdegree_distribution_loglog {gametitle}.png')
  plt.close()
  # The following code is for plotting the PageRank centrality distribution of the graph
  pagerank_centrality = gt.pagerank(g)
  plt.figure(figsize=(10, 6))
  do_plot(pagerank_centrality.a,color='pink')
  plt.title('PageRank Centrality Distribution')
  plt.xlabel('PageRank Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/pagerank_centrality_distribution_loglog {gametitle}.png')
  plt.close()


  #the following properties do not seem to follow power law, so rendering the curve is meaningless
  continue

  # The following code is for plotting the clustering coefficient distribution of the graph
  clustering_coefficient_distribution = gt.local_clustering(g)
  plt.figure(figsize=(10, 6))
  do_plot(clustering_coefficient_distribution.a,color='purple')
  plt.title('Clustering Coefficient Distribution')
  plt.xlabel('Clustering Coefficient')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/clustering_coefficient_distribution_loglog {gametitle}.png')
  plt.close()
  # # The following code is for plotting the reciprocity distribution of the graph
  # reciprocity_distribution =
  # plt.figure(figsize=(10, 6))
  # plt.hist(reciprocity_distribution.a, bins=50, color='orange', alpha=0.7)
  # plt.title('Reciprocity Distribution')
  # plt.xlabel('Reciprocity')
  # plt.ylabel('Frequency')
  # plt.grid()
  # plt.savefig(f'{output_directory}/reciprocity_distribution.png')
  # plt.close()
  # The following code is for plotting the betweenness centrality distribution of the graph
  betweenness_centrality = gt.betweenness(g)
  vertex_betweenness = betweenness_centrality[0]
  edge_betweenness = betweenness_centrality[1]
  plt.figure(figsize=(10, 6))
  do_plot(vertex_betweenness.a,color='cyan')
  plt.title('Vertex Betweenness Centrality Distribution')
  plt.xlabel('Betweenness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/vertex_betweenness_centrality_distribution_loglog {gametitle}.png')
  plt.close()

  plt.figure(figsize=(10, 6))
  do_plot(edge_betweenness.a,color='cyan')
  plt.title('Edge Betweenness Centrality Distribution')
  plt.xlabel('Betweenness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/edge_betweenness_centrality_distribution_loglog {gametitle}.png')
  plt.close()
  # The following code is for plotting the closeness centrality distribution of the graph
  closeness_centrality = gt.closeness(g)
  plt.figure(figsize=(10, 6))
  do_plot(closeness_centrality.a,color='magenta')
  plt.title('Closeness Centrality Distribution')
  plt.xlabel('Closeness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/closeness_centrality_distribution_loglog {gametitle}.png')
  plt.close()
  # The following code is for plotting the eigenvector centrality distribution of the graph
  eigenvector_centrality = gt.eigenvector(g)[1]
  plt.figure(figsize=(10, 6))
  do_plot(eigenvector_centrality.a,color='brown')
  plt.title('Eigenvector Centrality Distribution')
  plt.xlabel('Eigenvector Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.xscale('log')
  plt.yscale('log')
  plt.savefig(f'{output_directory}/eigenvector_centrality_distribution_loglog {gametitle}.png')
  plt.close()

