In [None]:
%run './model/multi_corpus.py'
%run './constants.py'

from matplotlib import cm

In [None]:
corpora = citation_graphs()
Gs = {field_name: corpus['G'] for (field_name, corpus) in corpora.items()}
Dfs = {field_name: corpus['Df'] for (field_name, corpus) in corpora.items()}

In [None]:
def gini(x):
    mad = np.abs(np.subtract.outer(x, x)).mean()
    rmad = mad / np.mean(x)
    g = 0.5 * rmad
    return g

In [None]:
d = {}

for field_name, corpus in corpora.items():

    print(field_name)

    G = corpus['G']
    df = corpus['Df']

    G_u = G.to_undirected()

    start_date, end_date = (
        df
        .filter(pl.col('Doi').is_in(list(G.nodes)))
        .select(pl.col('Date').min().alias('Min'), pl.col('Date').max().alias('Max'))
        .row(0)
    )

    in_degree = dict(G.in_degree()).values()
    out_degree = dict(G.out_degree()).values()
    degree = dict(G.degree()).values()

    d[field_name] = {
        'Nodes': nx.number_of_nodes(G),
        'Edges': nx.number_of_edges(G),
        'Density': nx.density(G),
        'Triangles': sum(nx.triangles(G.to_undirected()).values()) / 3,
        'AvgIndegree': sum(in_degree) / len(in_degree),
        'AvgOutdegree': sum(out_degree) / len(out_degree),
        'AvgDegree': sum(degree) / len(degree),
        'Gini': gini(np.fromiter(degree, int)),
        'IsDAG': nx.is_directed_acyclic_graph(G),
        'DAGLongestPath': len(nx.dag_longest_path(G)),
        'Louvain': len(nx_comm.louvain_communities(G)),
        'StrongComponents': len(list(nx.strongly_connected_components(G))),
        'WeakComponents': len(list(nx.weakly_connected_components(G))),
        'ClusteringDirected': nx.average_clustering(G),
        # 'ClusteringUndirected': nx.average_clustering(G.to_undirected()),
        'Roots': len([n for n in G.nodes() if G.out_degree(n) == 0]),
        'Transitivity': nx.transitivity(G),
        'StartDate': start_date,
        'EndDate': end_date,
        'Centralization': float((len(G) * max(in_degree) - sum(in_degree))) / (len(G)-1)**2,
        'Isolates': nx.number_of_isolates(G),
        'Loops': nx.number_of_selfloops(G),
        'Cycles': len(list(nx.simple_cycles(G))),
        # 'Avg Shortest Path': nx.average_shortest_path_length(G_u),
        # 'Diameter': nx.diameter(G_u),
        # 'label_propagation_communities': len(nx_comm.label_propagation_communities(G_u)),
        # 'Flow Hierarchy': nx.flow_hierarchy(G),
        # 'Triadic Census': nx.triadic_census(G),
        # 'transitive_closure': nx.transitive_closure(G),
        # 'transitive_closure_dag': nx.transitive_closure_dag(G),
    }

ds = [{'Field': field_name} | desc_d for field_name, desc_d in d.items()]
df = pl.DataFrame(ds)

latex = df.to_pandas().to_latex(
    index=False,
    na_rep=' ',
    bold_rows=True,
    float_format="%.2f",
)
with open(os.path.join(LATEX_TABLE_PATH, 'citation_desc_stats.tex'), 'w+') as file:
    file.write(latex)


# Slides

In [None]:
d = {}

for field_name, corpus in corpora.items():

    print(field_name)

    G = corpus['G']
    df = corpus['Df']

    G_u = G.to_undirected()

    start_date, end_date = (
        df
        .filter(pl.col('Doi').is_in(list(G.nodes)))
        .select(pl.col('Date').min().alias('Min'), pl.col('Date').max().alias('Max'))
        .row(0)
    )

    in_degree = dict(G.in_degree()).values()
    out_degree = dict(G.out_degree()).values()
    degree = dict(G.degree()).values()

    d[field_name] = {
        'Nodes': nx.number_of_nodes(G),
        'Edges': nx.number_of_edges(G),
        # 'Density': nx.density(G),
        'Triangles': sum(nx.triangles(G.to_undirected()).values()) / 3,
        'AvgIndegree': sum(in_degree) / len(in_degree),
        'AvgOutdegree': sum(out_degree) / len(out_degree),
        'AvgDegree': sum(degree) / len(degree),
        'Gini': gini(np.fromiter(degree, int)),
        'IsDAG': nx.is_directed_acyclic_graph(G),
        'DAGLongestPath': len(nx.dag_longest_path(G)),
        'Louvain': len(nx_comm.louvain_communities(G)),
        'StrongComponents': len(list(nx.strongly_connected_components(G))),
        'WeakComponents': len(list(nx.weakly_connected_components(G))),
        'ClusteringDirected': nx.average_clustering(G),
        # 'ClusteringUndirected': nx.average_clustering(G.to_undirected()),
        'Roots': len([n for n in G.nodes() if G.out_degree(n) == 0]),
        'Transitivity': nx.transitivity(G),
        'StartDate': start_date,
        'EndDate': end_date,
        'Centralization': float((len(G) * max(in_degree) - sum(in_degree))) / (len(G)-1)**2,
        'Isolates': nx.number_of_isolates(G),
        'Loops': nx.number_of_selfloops(G),
        'Cycles': len(list(nx.simple_cycles(G))),
        # 'Avg Shortest Path': nx.average_shortest_path_length(G_u),
        # 'Diameter': nx.diameter(G_u),
        # 'label_propagation_communities': len(nx_comm.label_propagation_communities(G_u)),
        # 'Flow Hierarchy': nx.flow_hierarchy(G),
        # 'Triadic Census': nx.triadic_census(G),
        # 'transitive_closure': nx.transitive_closure(G),
        # 'transitive_closure_dag': nx.transitive_closure_dag(G),
    }

ds = [{'Field': field_name} | desc_d for field_name, desc_d in d.items()]
df = pl.DataFrame(ds)

latex = df.to_pandas().to_latex(
    index=False,
    na_rep=' ',
    bold_rows=True,
    float_format="%.2f",
)

with open(os.path.join(LATEX_TABLE_PATH, 'citation_desc_stats.tex'), 'w+') as file:
    file.write(latex)