In [None]:
%run './model/multi_corpus.py'
%run './constants.py'

sns.set(rc = {'figure.figsize':(15,8)})

In [None]:
corpora = co_citation_graphs()
Gs = {field_name: corpus['G'] for (field_name, corpus) in corpora.items()}
Dfs = {field_name: corpus['Df'] for (field_name, corpus) in corpora.items()}

In [None]:
d = {}

for field_name, corpus in corpora.items():

    print(field_name)

    df = corpus['Df']
    G = corpus['G']

    start_date, end_date = (
        df
        .filter(pl.col('Doi').is_in(list(G.nodes)))
        .select(pl.col('Date').min().alias('Min'), pl.col('Date').max().alias('Max'))
        .row(0)
    )

    degree = dict(G.degree()).values()

    large_component = max(nx.connected_components(G), key=len)
    G_large_component = G.subgraph(large_component)

    d[field_name] = {
        'Nodes': nx.number_of_nodes(G),
        'Edges': nx.number_of_edges(G),
        'Density': nx.density(G),
        'AvgDegree': sum(degree) / len(degree),
        'AvgClustering': nx.average_clustering(G),
        'DiameterLargestComponent': nx.diameter(G.subgraph(large_component)),
        'AvgShortestPath': nx.average_shortest_path_length(G_large_component),
        'ShortestPath': nx.shortest_path_length(G),
        'Triangles': sum(nx.triangles(G).values()) / 3,
        'Gini': gini(degree),
        'Louvain': len(nx_comm.louvain_communities(G)),
        'Components': len(list(nx.connected_components(G))),
        'Clustering': nx.average_clustering(G),
        'Transitivity': nx.transitivity(G),
        'Centralization': float((len(G) * max(degree) - sum(degree))) / (len(G)-1)**2,
        'Isolates': nx.number_of_isolates(G),
        'Loops': nx.number_of_selfloops(G),
        # 'LabelPropagation': len(nx_comm.label_propagation_communities(G)),
    }

df = pl.DataFrame([{'Field': field_name} | desc_d for field_name, desc_d in d.items()])

latex = df.to_pandas().to_latex(
    index=False,
    na_rep=' ',
    bold_rows=True,
    float_format="%.2f",
)
with open(os.path.join(LATEX_TABLE_PATH, 'co_citation_desc_stats.tex'), 'w+') as file:
    file.write(latex)


In [None]:
d = {}

for field_name, corpus in corpora.items():

    print(field_name)

    df = corpus['Df']
    G = corpus['G']
    nodes = list(G.nodes)

    large_component = max(nx.connected_components(G), key=len)
    G_large_component = G.subgraph(large_component)

    # NODES EDGES
    number_of_nodes = nx.number_of_nodes(G)
    number_of_edges = nx.number_of_edges(G)

    # DENSITY
    density = nx.density(G)

    # TRIANGLES
    triangles = sum(nx.triangles(G).values()) / 3

    # DEGREE
    degree = dict(G.degree()).values()
    avg_degree = sum(degree) / len(degree)

    # GINI
    degrees = G.degree()
    gini = 1 - sum((degrees[n] / len(G.edges))**2 for n in G.nodes)

    # COMPONENTS
    n_connected_components = len(list(nx.connected_components(G)))

    # DIAMETER
    
    diameter = nx.diameter(G.subgraph(large_component))
    
    # SHORTEST PATH
    shortest_path = nx.shortest_path_length(G)

    # AVG SHORTEST PATH
    average_shortest_path_length = nx.average_shortest_path_length(G_large_component)

    # CLUSTERING
    clustering = nx.average_clustering(G)
    transitivity = nx.transitivity(G)

    # DATES
    start_date, end_date = (
        df
        .filter(pl.col('Doi').is_in(nodes))
        .select(
            pl.col('Date').min().alias('Min'),
            pl.col('Date').max().alias('Max'),
        )
        .row(0)
    )

    # CENTRALIZATION
    degrees = dict(G.degree()).values()
    centralization = float((len(G) * max(degrees) - sum(degrees))) / (len(G)-1)**2

    # COMMUNITIES
    louvain_communities = len(nx_comm.louvain_communities(G))
    label_propagation_communities = len(nx_comm.label_propagation_communities(G))

    # ISOLATES LOOPS CYCLES
    isolates = nx.number_of_isolates(G)
    loops = nx.number_of_selfloops(G)

    d[field_name] = {
        'Nodes': number_of_nodes,
        'Edges': number_of_edges,
        'Density': round(density, 4),
        'Diameter': diameter,
        'Geodesic': average_shortest_path_length,
        'Triangles': triangles,
        'AvgDegree': round(avg_degree, 4),
        'Gini': round(gini, 4),
        'Louvain': louvain_communities,
        'Components': n_connected_components,
        'Clustering': round(clustering, 4),
        'Transitivity': round(transitivity, 4),
        'StartDate': start_date,
        'EndDate': end_date,
        'Centralization': round(centralization, 4),
        'Isolates': isolates,
        'Loops': loops,
    }

ds = [{'Field': field_name} | desc_d for field_name, desc_d in d.items()]
df = pl.DataFrame(ds)

latex = df.to_pandas().to_latex(
    index=False,
    na_rep=' ',
    bold_rows=True,
    float_format="%.2f",
)
with open(os.path.join(LATEX_TABLE_PATH, 'co_citation_desc_stats.tex'), 'w+') as file:
    file.write(latex)
