In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
sns.set_style("whitegrid")

In [None]:
fontsize = 30
extra = 4
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "Times New Roman"
plt.rcParams["mathtext.fontset"] = "stix"

In [None]:
files = [f"../data/reference_sets_{y}.csv" for y in range(1998,2020)]

In [None]:
df = pd.concat([pd.read_csv(file, index_col=0) for file in files])
df["root_title"] = df.root.map(lambda x:int(x[:2]))
df["root_title_str"] = df.root_title.map(str)

In [None]:
b1 = mpatches.Rectangle((0,0),1,1,fc="#3333FF", alpha=0.75)
b2 = mpatches.Rectangle((0,0),1,1,fc="#FF0000", alpha=0.75)
b3 = mpatches.Rectangle((0,0),1,1,fc="#991A80", alpha=0.75)

In [None]:
fig, ax = plt.subplots(figsize=(30,9))
sns.histplot(data=df.query('year == 1998 or year == 2019').sort_values(['root_title']), stat='probability', common_norm=False,
             x="root_title_str",y="ref_edges",hue="year", discrete=(True,False), binwidth=25, ax=ax, 
             palette=['#3333FF', '#FF0000'], hue_order=[1998,2019], alpha=0.75, legend=False)
ax.set_xlabel("Title", fontsize=fontsize+extra)
ax.set_xticks(range(len(df.root_title.unique())))
ax.set_xticklabels(sorted(df.root_title.unique()), fontsize=fontsize)
ax.set_ylabel("Number of Edges in Reference Tree", fontsize=fontsize+extra)
ax.set_yticks(range(0,1501,250))
ax.set_yticklabels(range(0,1501,250), fontsize=fontsize)
ax.set_ylim(0,1500)
ax.set_xlim(-0.5,52.5)
plt.legend([b1, b2, b3], [1998, 2019, "Both"], title="Year", labelspacing=0.2,
           fontsize=fontsize, title_fontsize=fontsize+extra, loc="center right")
plt.tight_layout()
plt.savefig("../writing/figures/reference_tree_size_ref_edges_2dhist.pdf", transparent=True)