In [43]:
import os

class FileProcessing:
  def __init__(self, files):
    self.FILES = files
      
  @classmethod
  def get_file_path(self, file_name):
    return os.path.join('..', '..', 'data', f"{file_name}.txt")

# Requirement III

Reproduce the table below for each of the chosen networks.

|                     -                    | Qtd. Vértices| Qtd. Arestas | Degree assortativity coefficient | Qtd Comp. Conectados | Tamanho do Comp. Gigante (GCC)  | Coef. de Clustering avg_clustering()|
|:-----------------------------------------|:------------:|:------------:|:-------------------------------:|:--------------------:|:-------------------------------:|:-----------------------------------:|
| email-EuAll-processed                    | 265214       | 365570       | -0.178188                       | 15836                | 224832                          | 0.067085                            |
| roadNet-PA-processed                     | 1088092      | 1541898      | 0.122749                        | 206                  | 1087562                         | 0.046477                            |
| ca-HepTh-processed                       | 9877         | 25998        | 0.267495                        | 429                  | 8638                            | 0.471439                            |
| p2p-Gnutella09-processed                 | 8114         | 26013        | 0.033224                        | 6                    | 8104                            | 0.009526                            |
| higgs-activity_time                      | 304691       | 457555       | -0.063312                       | 13408                | 272861                          | 0.103407                            |


In [44]:
import networkx as nx

class BipartiteGraphGenerator:
  def __init__(self, file):
    self.file = open(FileProcessing.get_file_path(file), 'r')
    self.rows = self.file.readlines()

    self.G = nx.Graph()
    self.H = None

    self.nodes = None
    self.edges = None
    self.degree_assortativity_coefficient = None
    self.number_connected_components = None
    self.gcc = None
    self.clustering_coefficient = None
  
  def generate_graph(self):
    self._add_edges()
    
    self.H = nx.DiGraph(self.G)
    self.file.close()

    self.update_graph_information()

    return self.G
  
  def update_graph_information(self):
    self.nodes = len(self.G.nodes) # Qtd de vertices
    self.edges = len(self.G.edges) # Qtd arestas
    self.degree_assortativity_coefficient = nx.degree_assortativity_coefficient(self.G) # Degree assortativity coefficient
    self.number_connected_components = nx.number_connected_components(self.G) # Qtd componentes conectados
    self.gcc = len(max(nx.connected_components(self.G), key=len)) # Tamanho do componente gigante (GCC)
    self.clustering_coefficient = nx.average_clustering(self.G) # Coef. de clustering

  def _add_edges(self):
    for row in self.rows:
      row = row.rstrip()
      nodes = row.split()
      self.G.add_edge(int(nodes[0]), int(nodes[1]))

In [45]:
import pandas as pd

class DataFrameGenerator:
  def __init__(self, files):
    self.files = files
    self.columns = [
      "Rede",
      "Qtd. Vértices",
      "Qtd. Arestas",
      "Degree assortativity coefficient",
      "Tamanho do Comp. Gigante (GCC)",
      "Tamanho do componente gigante (GCC)",
      "Coef. de Clustering avg_clustering()"
    ]
    
    self.dataFrame = pd.DataFrame(columns=self.columns)

  def generate_dataFrame(self):
    data = []
    for file in self.files:
      graph = BipartiteGraphGenerator(file)
      graph.generate_graph()
      data.append([
        file,
        graph.nodes,
        graph.edges,
        graph.degree_assortativity_coefficient,
        graph.number_connected_components,
        graph.gcc,
        graph.clustering_coefficient
      ])
    return pd.DataFrame(data, columns=self.columns)

In [46]:
dataFrame = DataFrameGenerator([
  "email-EuAll-processed",
  "roadNet-PA-processed",
  "ca-HepTh-processed",
  "p2p-Gnutella09-processed",
  "higgs-activity_time"
])

dataFrame.generate_dataFrame()

Unnamed: 0,Rede,Qtd. Vértices,Qtd. Arestas,Degree assortativity coefficient,Tamanho do Comp. Gigante (GCC),Tamanho do componente gigante (GCC),Coef. de Clustering avg_clustering()
0,email-EuAll-processed,265214,365570,-0.178188,15836,224832,0.067085
1,roadNet-PA-processed,1088092,1541898,0.122749,206,1087562,0.046477
2,ca-HepTh-processed,9877,25998,0.267495,429,8638,0.471439
3,p2p-Gnutella09-processed,8114,26013,0.033224,6,8104,0.009526
4,higgs-activity_time,304691,457555,-0.063312,13408,272861,0.103407
