In [1]:
#run these commands to install graph-tool for colab
!pip install -q condacolab
import condacolab
condacolab.install()
!mamba install -q graph-tool

⏬ Downloading https://github.com/jaimergp/miniforge/releases/download/24.11.2-1_colab/Miniforge3-colab-24.11.2-1_colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:18
🔁 Restarting kernel...
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... 


done


In [None]:
#other imports
import graph_tool.all as gt
import matplotlib.pyplot as plt
import argparse
import os
import datetime
import numpy as np
from scipy import stats

In [9]:
#load a graphml file and split into four subgraphs based on game type
graphml_file = 'converted_13wtl.graphml'
gX = gt.load_graph(graphml_file)
gB = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='B',value_type="bool"))
gB = gt.GraphView(gB, vfilt=gB.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
gC = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='C',value_type="bool"))
gC = gt.GraphView(gC, vfilt=gC.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
gN = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='N',value_type="bool"))
gN = gt.GraphView(gN, vfilt=gN.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
gZ = gt.GraphView(gX, efilt=gX.edge_properties['game type'].t(lambda a : a=='Z',value_type="bool"))
gZ = gt.GraphView(gZ, vfilt=gZ.degree_property_map('total').t(lambda a : a>0,value_type="bool"))
display(gX,gB,gC,gN,gZ)
for g,name in [(gX,''),(gB,' - B'),(gC,' - C'),(gN,' - N'),(gZ,' - Z')]:
  #display minimum and maximum degree for each graph - min should be positive (every subgraph had the isoalted vertices removed)
  display(min(g.degree_property_map("total")),max(g.degree_property_map("total")))

<Graph object, directed, with 92506 vertices and 4770357 edges, 1 internal vertex property, 3 internal edge properties, at 0x7d0c2c792e50>

<GraphView object, directed, with 31341 vertices and 1622629 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7d0c2669e190, at 0x7d0c28035550>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7d0c2669e190, at 0x7d0c2c4c0410>, at 0x7d0c2669e190>

<GraphView object, directed, with 61700 vertices and 1230129 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7d0c2661d850, at 0x7d0c26797690>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7d0c2661d850, at 0x7d0c2fea0a50>, at 0x7d0c2661d850>

<GraphView object, directed, with 8413 vertices and 20924 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7d0c2710dbd0, at 0x7d0c26626290>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7d0c2710dbd0, at 0x7d0c31f2d3d0>, at 0x7d0c2710dbd0>

<GraphView object, directed, with 56523 vertices and 1896675 edges, 1 internal vertex property, 3 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x7d0c2c4cba10, at 0x7d0c28419990>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x7d0c2c4cba10, at 0x7d0c27143950>, at 0x7d0c2c4cba10>

1

8243

1

8232

1

1530

1

302

1

3099

In [10]:
#run for the original graph and every subgraph
for g,gametitle in [(gX,'all games'),(gB,'bullet games'),(gC,'classical games'),(gN,'correspondence games'),(gZ,'blitz games')]:

  #set the target directory here
  output_directory = 'result_13wtl_'+gametitle

  if not os.path.exists(output_directory):
      os.makedirs(output_directory)

  degrees = g.degree_property_map("total")
  min_degree = min(degrees)
  max_degree = max(degrees)

  indegree = g.degree_property_map("in")
  min_indegree = min(indegree)
  max_indegree = max(indegree)

  outdegree = g.degree_property_map("out")
  min_outdegree = min(outdegree)
  max_outdegree = max(outdegree)

  mean_degree = 2 * g.num_edges() / g.num_vertices()
  mean_indegree = g.num_edges() / g.num_vertices()
  density = mean_degree / (g.num_vertices() - 1)

  global_clustering_coefficient = gt.global_clustering(g)
  reciprocity = gt.edge_reciprocity(g)

  with open(f'{output_directory}/log.txt', 'w') as f:
      print(f'Graph located at: {graphml_file}',file=f)
      print(f'starting time: {datetime.time()}',file=f)
      print('==========================',file=f)
      print(f'Number of vertices: {g.num_vertices()}', file=f)
      print(f'Number of edges: {g.num_edges()}', file=f)
      print(f'Min degree: {min_degree}', file=f)
      print(f'Max degree: {max_degree}', file=f)
      print(f'Mean degree: {mean_degree}', file=f)
      print(f'Min indegree: {min_indegree}', file=f)
      print(f'Max indegree: {max_indegree}', file=f)
      print(f'Min outdegree: {min_outdegree}', file=f)
      print(f'Max outdegree: {max_outdegree}', file=f)
      print(f'Mean indegree = Mean outdegree: {mean_indegree}', file=f)
      print(f'Density: {density}', file=f)
      print(f'Global clustering coefficient: {global_clustering_coefficient}', file=f)
      print(f'Reciprocity: {reciprocity}', file=f)
      print('==========================', file=f)
      print(f'ending time: {datetime.time()}',file=f)

      print('Statistics log created successfully.')
      print(f'Output file: {output_directory}.txt')

  edge_chance = g.num_edges()/(g.num_vertices()*(g.num_vertices()-1))

  # The following code is for plotting the degree distribution of the graph
  degree_distribution = g.degree_property_map("total")
  fig = plt.figure(figsize=(10, 6))
  plt.hist(degree_distribution.a, bins=50, color='blue', alpha=0.7,density=True)
  x = np.array(range(min(degree_distribution.a),max(degree_distribution.a)+1))
  plt.plot(x, stats.binom(2*(g.num_vertices()-1),edge_chance).pmf(x), 'r-', label='Expected degree density for Erdős–Rényi random graph')
  plt.title(f'Degree Distribution ({gametitle})')
  plt.xlabel('Degree')
  plt.ylabel('Frequency')
  plt.grid()
  #plt.yscale('log')
  fig.legend()
  fig.savefig(f'{output_directory}/degree_distribution {gametitle}.png')
  plt.close()
  display("DEGREES TEST")
  display(stats.kstest(degree_distribution.a, "binom", args=(2*(g.num_vertices()-1),edge_chance)))
  # The following code is for plotting the indegree distribution of the graph
  indegree_distribution = g.degree_property_map("in")
  fig = plt.figure(figsize=(10, 6))
  plt.hist(indegree_distribution.a, bins=50, color='green', alpha=0.7,density=True)
  x = np.array(range(min(indegree_distribution.a),max(indegree_distribution.a)+1))
  plt.plot(x, stats.binom(g.num_vertices()-1,edge_chance).pmf(x), 'r-', label='Expected indegree density for Erdős–Rényi random graph')
  plt.title(f'Indegree Distribution ({gametitle})')
  plt.xlabel('Indegree')
  plt.ylabel('Frequency')
  plt.grid()
  #plt.yscale('log')
  fig.legend()
  plt.savefig(f'{output_directory}/indegree_distribution {gametitle}.png')
  plt.close()
  display("INDEGREES TEST")
  display(stats.kstest(indegree_distribution.a, "binom", args=(g.num_vertices()-1,edge_chance)))
  # The following code is for plotting the outdegree distribution of the graph
  outdegree_distribution = g.degree_property_map("out")
  fig = plt.figure(figsize=(10, 6))
  plt.hist(outdegree_distribution.a, bins=50, color='red', alpha=0.7,density=True)
  x = np.array(range(min(outdegree_distribution.a),max(outdegree_distribution.a)+1))
  plt.plot(x, stats.binom(g.num_vertices()-1,edge_chance).pmf(x), 'r-', label='Expected outdegree density for Erdős–Rényi random graph')
  plt.title(f'Outdegree Distribution ({gametitle})')
  plt.xlabel('Outdegree')
  plt.ylabel('Frequency')
  plt.grid()
  #plt.yscale('log')
  fig.legend()
  plt.savefig(f'{output_directory}/outdegree_distribution {gametitle}.png')
  plt.close()
  display("OUTDEGREES TEST")
  display(stats.kstest(outdegree_distribution.a, "binom", args=(g.num_vertices()-1,edge_chance)))


  #reenable the rest of statistics if we know the expected distribution in a random graph

  continue



  # The following code is for plotting the clustering coefficient distribution of the graph
  clustering_coefficient_distribution = gt.local_clustering(g)
  plt.figure(figsize=(10, 6))
  plt.hist(clustering_coefficient_distribution.a, bins=50, color='purple', alpha=0.7)
  plt.title('Clustering Coefficient Distribution')
  plt.xlabel('Clustering Coefficient')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/clustering_coefficient_distribution.png')
  plt.close()
  # # The following code is for plotting the reciprocity distribution of the graph
  # reciprocity_distribution =
  # plt.figure(figsize=(10, 6))
  # plt.hist(reciprocity_distribution.a, bins=50, color='orange', alpha=0.7)
  # plt.title('Reciprocity Distribution')
  # plt.xlabel('Reciprocity')
  # plt.ylabel('Frequency')
  # plt.grid()
  # plt.savefig(f'{output_directory}/reciprocity_distribution.png')
  # plt.close()
  # The following code is for plotting the betweenness centrality distribution of the graph
  betweenness_centrality = gt.betweenness(g)
  vertex_betweenness = betweenness_centrality[0]
  edge_betweenness = betweenness_centrality[1]
  plt.figure(figsize=(10, 6))
  plt.hist(vertex_betweenness.a, bins=50, color='cyan', alpha=0.7)
  plt.title('Vertex Betweenness Centrality Distribution')
  plt.xlabel('Betweenness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/vertex_betweenness_centrality_distribution.png')
  plt.close()

  plt.figure(figsize=(10, 6))
  plt.hist(edge_betweenness.a, bins=50, color='cyan', alpha=0.7)
  plt.title('Edge Betweenness Centrality Distribution')
  plt.xlabel('Betweenness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/edge_betweenness_centrality_distribution.png')
  plt.close()
  # The following code is for plotting the closeness centrality distribution of the graph
  closeness_centrality = gt.closeness(g)
  plt.figure(figsize=(10, 6))
  plt.hist(closeness_centrality.a, bins=50, color='magenta', alpha=0.7)
  plt.title('Closeness Centrality Distribution')
  plt.xlabel('Closeness Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.savefig(f'{output_directory}/closeness_centrality_distribution.png')
  plt.close()
  # The following code is for plotting the eigenvector centrality distribution of the graph
  eigenvector_centrality = gt.eigenvector(g)[1]
  plt.figure(figsize=(10, 6))
  plt.hist(eigenvector_centrality.a, bins=50, color='brown', alpha=0.7)
  plt.title('Eigenvector Centrality Distribution')
  plt.xlabel('Eigenvector Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/eigenvector_centrality_distribution.png')
  plt.close()
  # The following code is for plotting the PageRank centrality distribution of the graph
  pagerank_centrality = gt.pagerank(g)
  plt.figure(figsize=(10, 6))
  plt.hist(pagerank_centrality.a, bins=50, color='pink', alpha=0.7)
  plt.title('PageRank Centrality Distribution')
  plt.xlabel('PageRank Centrality')
  plt.ylabel('Frequency')
  plt.grid()
  plt.yscale('log')
  plt.savefig(f'{output_directory}/pagerank_centrality_distribution.png')
  plt.close()



Statistics log created successfully.
Output file: result_16wtlall games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.6877633878879404), pvalue=np.float64(0.0), statistic_location=np.int32(79), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.669070347415132), pvalue=np.float64(0.0), statistic_location=np.int32(35), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.6752753503338617), pvalue=np.float64(0.0), statistic_location=np.int32(35), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_16wtlbullet games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.9221767065033839), pvalue=np.float64(0.0), statistic_location=np.int32(73), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.9148784722155733), pvalue=np.float64(0.0), statistic_location=np.int32(32), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.9201429955978403), pvalue=np.float64(0.0), statistic_location=np.int32(32), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_16wtlclassical games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.7705676929104615), pvalue=np.float64(0.0), statistic_location=np.int32(25), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.7492356700501964), pvalue=np.float64(0.0), statistic_location=np.int32(11), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.7492103379582447), pvalue=np.float64(0.0), statistic_location=np.int32(10), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_16wtlcorrespondence games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.9068043794760153), pvalue=np.float64(0.0), statistic_location=np.int32(1), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.845284735702729), pvalue=np.float64(0.0), statistic_location=np.int32(0), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.8642240477473531), pvalue=np.float64(0.0), statistic_location=np.int32(0), statistic_sign=np.int8(1))

Statistics log created successfully.
Output file: result_16wtlblitz games.txt


'DEGREES TEST'

KstestResult(statistic=np.float64(0.8086818058688553), pvalue=np.float64(0.0), statistic_location=np.int32(46), statistic_sign=np.int8(1))

'INDEGREES TEST'

KstestResult(statistic=np.float64(0.7927867111818994), pvalue=np.float64(0.0), statistic_location=np.int32(20), statistic_sign=np.int8(1))

'OUTDEGREES TEST'

KstestResult(statistic=np.float64(0.7967324011911961), pvalue=np.float64(0.0), statistic_location=np.int32(20), statistic_sign=np.int8(1))