In [None]:
%run './model/multi_corpus.py'
%run './constants.py'

from matplotlib import cm
from collections import Counter

In [None]:
corpora = citation_graphs()
Gs = {field_name: corpus['G'] for (field_name, corpus) in corpora.items()}
Dfs = {field_name: corpus['Df'] for (field_name, corpus) in corpora.items()}

In [None]:

sns.reset_defaults()

for field_name, df in Dfs.items():

    df = (
        df
        .select(pl.col('Doi'), pl.col('Date'), pl.col('References'))
        .explode('References')
        .drop_nulls()
        .join(other=df.select('Doi', 'Date'), left_on='References', right_on='Doi', how='left', suffix='References')
        .with_columns((pl.col('Date').dt.year() - pl.col('DateReferences').dt.year()).alias('DateDiff'))
    )

    dates = df['DateDiff']
    date, freq = zip(*Counter(dates).items())
    pk = np.array(freq) / np.sum(freq)


    fig = plt.figure(figsize=(3, 3))
    ax = fig.add_axes([0, 0, 1, 1])

    ax.bar(date, freq, edgecolor='k')
    ax.set_xlabel('$Delta Date$')
    ax.set_ylabel('$Freq$')

    ax1 = ax.twinx()
    ax1.bar(date, pk, edgecolor='k')
    ax1.set_ylabel('$P(Date Diff)$')

    ax.grid(False)

    fig.tight_layout()

    plt.savefig(
        os.path.join(LATEX_FIGURES_PATH, 'citation_ref_dates', f'{field_name.capitalize()}.png'), 
        transparent=True, 
        dpi=150,
        bbox_inches='tight'
    )

In [None]:
d = {'': ['Mean', 'Std', 'Min', 'Max']}
for field_name, df in Dfs.items():
    d[field_name] = (
        df
        .select(pl.col('Doi'), pl.col('Date'), pl.col('References'))
        .explode('References')
        .join(other=df.select('Doi', 'Date'), left_on='References', right_on='Doi', how='left', suffix='References')
        .with_columns((pl.col('Date').dt.year() - pl.col('DateReferences').dt.year()).alias('DateDiff'))
        .select(
            pl.col('DateDiff').mean().alias('Mean'),
            pl.col('DateDiff').std().alias('Std'),
            pl.col('DateDiff').min().alias('Min'),
            pl.col('DateDiff').max().alias('Max'),
        ).row(0)
    )
df = pl.DataFrame(d)

# new_cols = []
# for col in df.columns:
#     if '&' in col:
#         # \makecell{Some really \\ longer text}
#         col = ' & '.join(col.split('&'))
#     new_cols.append(col)
# df.columns = new_cols

latex = (
    df
    .to_pandas()
    .to_latex(
        index=False,
        na_rep=' ',
        bold_rows=True,
        float_format="%.2f",
    )
)

newline_splits = latex.split('\n')
col_labels = newline_splits[2]

new_cols_labels = []
for col_label in col_labels.split(' &  '):
    if '&' in col_label:
        col_label = col_label.split('\\&')
        col_label = ' \\& \\\\ '.join(col_label)
        # \begin{tabular}{cc} Artificial \\ Intelligence \end{tabular} 
        col_label = '\\begin{tabular}{cc} ' + col_label + ' \end{tabular}'
    new_cols_labels.append(col_label)

new_cols_labels = ' & '.join(new_cols_labels)
newline_splits[2] = new_cols_labels
latex = ' \n'.join(newline_splits)

print(latex)

with open(os.path.join(LATEX_TABLE_PATH, 'citation_ref_dates.tex'), 'w+') as file:
    file.write(latex)
    
# # df