In [None]:
%run './model/multi_corpus.py'
%run './constants.py'

sns.set(rc = {'figure.figsize':(15,8)})

In [None]:
corpora = co_citation_graphs()
Gs = {field_name: corpus['G'] for (field_name, corpus) in corpora.items()}
Dfs = {field_name: corpus['Df'] for (field_name, corpus) in corpora.items()}

In [None]:
def gini(x):
    mad = np.abs(np.subtract.outer(x, x)).mean()
    rmad = mad/np.mean(x)
    g = 0.5 * rmad
    return g

In [None]:
d = {}

for field_name, corpus in corpora.items():

    print(field_name)

    df = corpus['Df']
    G = corpus['G']

    start_date, end_date = (
        df
        .filter(pl.col('Doi').is_in(list(G.nodes)))
        .select(pl.col('Date').min().alias('Min'), pl.col('Date').max().alias('Max'))
        .row(0)
    )

    degree = dict(G.degree()).values()

    large_component = max(nx.connected_components(G), key=len)
    G_large_component = G.subgraph(large_component)

    d[field_name] = {
        'Nodes': nx.number_of_nodes(G),
        'Edges': nx.number_of_edges(G),
        'Density': nx.density(G),
        'AvgDegree': sum(degree) / len(degree),
        'AvgClustering': nx.average_clustering(G),
        'DiameterLargestComponent': nx.diameter(G.subgraph(large_component)),
        'AvgShortestPath': nx.average_shortest_path_length(G_large_component),
        'ShortestPath': nx.shortest_path_length(G),
        'Triangles': sum(nx.triangles(G).values()) / 3,
        'Gini': gini(degree),
        'Louvain': len(nx_comm.louvain_communities(G)),
        'Components': len(list(nx.connected_components(G))),
        'Clustering': nx.average_clustering(G),
        'Transitivity': nx.transitivity(G),
        'Centralization': float((len(G) * max(degree) - sum(degree))) / (len(G)-1)**2,
        'Isolates': nx.number_of_isolates(G),
        'Loops': nx.number_of_selfloops(G),
        # 'LabelPropagation': len(nx_comm.label_propagation_communities(G)),
    }

df = pl.DataFrame([{'Field': field_name} | desc_d for field_name, desc_d in d.items()])

latex = (
    df
        .to_pandas()
        .to_latex(
            index=False,
            na_rep=' ',
            bold_rows=True,
            float_format="%.2f",
        )
)
with open(os.path.join(LATEX_TABLE_PATH, 'co_occurrence_desc_stats.tex'), 'w+') as file:
    file.write(latex)
