In [None]:
%run './model/multi_corpus.py'
%run './constants.py'


In [None]:
corpora = co_citation_graphs(n_edges=50)
Gs = {field_name: corpus['G'] for (field_name, corpus) in corpora.items()}
Dfs = {field_name: corpus['Df'] for (field_name, corpus) in corpora.items()}

# Girvan-Newman

In [None]:
sns.reset_defaults()

for (field_name, G) in Gs.items():

    print(field_name)

    nodes = list(G.nodes)
    comms = list(nx_comm.girvan_newman(G))
    comms.insert(0, (set(nodes),))

    Z, leaves = compute_Z(comms)
    
    new_leaves = dict(
        pl.read_parquet(f'./output/main_dfs/{field_name}.parquet')
        .select(
            pl.col('Doi'), 
            pl.concat_str([
                pl.lit('('),
                pl.col('Authors').arr.first().str.split(', ').arr.first(),
                pl.lit(', '),
                pl.col('Date').dt.year(),
                pl.lit(')')
            ])
        )
        .filter(pl.col('Doi').is_in(leaves))
        .to_numpy()
    )

    leaves = dict(zip(leaves, leaves))
    leaves.update(new_leaves)

    fig = plt.figure(figsize=(15, 18))
    ax = fig.add_axes([0, 0, 1, 1])

    # ax.set_xlabel('Node')
    ax.set_ylabel('Distance')
    
    dendrogram(Z, labels=list(leaves.values()), ax=ax)
    ax.yaxis.grid(False)
    ax.xaxis.grid(False)
    
    fig.tight_layout()

    plt.savefig(
        os.path.join(LATEX_FIGURES_PATH, 'co_citation_girvan_newman_hier_comm', f'{field_name.capitalize()}.png'), 
        transparent=True, 
        dpi=300,
        bbox_inches='tight'
    )

    

# Louvain

In [None]:


for (field_name, G) in Gs.items():

    print(field_name)

    nodes = list(G.nodes)
    
    comms = list(sorted(nx_comm.louvain_partitions(G, weight=None), key=len))
    comms.insert(0, (set(nodes),))
    comms.append(tuple({n} for n in nodes))

    Z, leaves = compute_Z(comms)

    new_leaves = dict(
        pl.read_parquet(f'./output/main_dfs/{field_name}.parquet')
        .select(
            pl.col('Doi'), 
            pl.concat_str([
                pl.lit('('),
                pl.col('Authors').arr.first().str.split(', ').arr.first(),
                pl.lit(', '),
                pl.col('Date').dt.year(),
                pl.lit(')')
            ])
        )
        .filter(pl.col('Doi').is_in(leaves))
        .to_numpy()
    )

    leaves = dict(zip(leaves, leaves))
    leaves.update(new_leaves)


    fig = plt.figure(figsize=(15, 18))
    ax = fig.add_axes([0, 0, 1, 1])

    # ax.set_xlabel('Node')
    ax.set_ylabel('Distance')
    
    dendrogram(Z, labels=list(leaves.values()), ax=ax)
    
    ax.yaxis.grid(False)
    ax.xaxis.grid(False)
    
    fig.tight_layout()

    plt.savefig(
        os.path.join(LATEX_FIGURES_PATH, 'co_citation_louvain_hier_comm', f'{field_name.capitalize()}.png'), 
        transparent=True, 
        dpi=300,
        bbox_inches='tight'
    )
