## US Titles Growth

In this notebook, we mainly create Figure 3 (growth of US Titles from 1994 to 2018 measured in tokens).
We also create the analogue figures for other growth measures that are presented in the SI
and explore some other statistics not discussed in the paper.

### Preparations

In [None]:
from collections import Counter
import seaborn as sns
import networkx as nx
import pandas as pd
from matplotlib import pyplot as plt
from quantlaw.utils.networkx import quotient_graph

In [None]:
%matplotlib inline
sns.set_style('darkgrid')

In [None]:
Gs = {}
for year in range(1994, 2019):
    Gs[year] = [n for n in nx.read_gpickle(f'../../legal-networks-data/us/4_crossreference_graph/seqitems/{year}.gpickle.gz').nodes(data=True)
                if n[-1]['level'] == 0]

In [None]:
df = pd.DataFrame(columns=['title', 'name', 'tokens_n', 'year'])
for year, titles in Gs.items(): 
    for title in titles:
        df.loc[len(df)] = [int(title[0].split('_')[0][:-1]), title[-1]['heading'], title[-1]['tokens_n'], year]

### Name changes in Titles

In [None]:
df.groupby([df.title, df.name]).count()

### Tokens per Title

In [None]:
growth_df = df.pivot(index='year', columns='title', values='tokens_n')

In [None]:
plt.rcParams['figure.figsize'] = (32,12)
plt.rcParams['font.size'] = 24
growth_df.T[[x for x in range(1994, 2019, 4)]].T.sort_index(ascending=False).plot.barh(stacked=True, legend='upper left')
plt.xlabel('Number of tokens')
plt.ylabel('Year')
plt.legend(title='Title', fontsize=19, loc='upper right', ncol=5)
plt.tight_layout()
plt.savefig('../graphics/us-tokens-per-title.pdf')

### Absolute and relative token growth from 1994 to 2018

In [None]:
growth_df.loc[1994]

In [None]:
((growth_df.loc[2018] - growth_df.loc[1994]).fillna(0)).sort_values()

In [None]:
((growth_df.loc[2018] - growth_df.loc[1994]).fillna(0) / growth_df.loc[1994].fillna(0.1)).sort_values()

### Other visualization options for tokens per Title

In [None]:
plt.rcParams['figure.figsize'] = (24,12.5)
growth_df.sort_index(ascending=False).plot.barh(stacked=True, legend='upper left')
plt.xlabel('Number of tokens')
plt.ylabel('Year')
plt.legend(title='Title')
plt.tight_layout()

In [None]:
reldf = growth_df.div(growth_df.fillna(0).sum(axis=1), axis='rows')
reldf.head()

In [None]:
reldf.sort_index(ascending=False).plot.barh(stacked=True, legend='upper right')
plt.xlim(0,1.1)
plt.xlabel('Fraction of characters')
plt.ylabel('Year')
plt.tight_layout()

### Cross-references

In [None]:
def make_ordered_multigraph(G):
    SG = nx.OrderedMultiDiGraph()
    SG.add_nodes_from(sorted(G.nodes))
    SG.add_edges_from((u, v, data) for (u, v, data) in G.edges(data=True) if u in SG if v in SG)
    return SG

def make_ordered_quotient_graph(G, self_loops=False):
    qG = quotient_graph(G, 'law_name', edge_types=['reference'], self_loops=self_loops)
    nx.set_node_attributes(G, {title:title for title in qG.nodes})
    qG = nx.relabel_nodes(qG, {title:int(title.split('-', 1)[0].split(' ')[-1]) for title in qG.nodes})
    SG = make_ordered_multigraph(qG)
    return SG

#### Cross-reference heatmaps for individual years

In [None]:
G = nx.read_gpickle(f'../../legal-networks-data/us/4_crossreference_graph/seqitems/{2017}.gpickle.gz')
SG = make_ordered_quotient_graph(G, self_loops=True)
sns.heatmap(nx.adjacency_matrix(SG).todense(), square=True, 
            vmin=0, vmax=100,#robust=True, 
            xticklabels=SG.nodes, yticklabels=SG.nodes);

### In- and Out-Degree, Internal References per Title

In [None]:
def make_degree_dfs(self_loops=False):
    indegree_df = pd.DataFrame(columns=range(1,55))
    outdegree_df = pd.DataFrame(columns=range(1,55))
    for year in range(1994,2019):
        G = nx.read_gpickle(f'../../legal-networks-data/us/4_crossreference_graph/seqitems/{year}.gpickle.gz')
        SG = make_ordered_quotient_graph(G, self_loops=self_loops)
        indegree_df.loc[year] = [SG.in_degree(n) if n in SG.nodes else 0 for n in range(1,55)]
        outdegree_df.loc[year] = [SG.out_degree(n) if n in SG.nodes else 0 for n in range(1,55)]
    indegree_df = indegree_df.T[indegree_df.any(axis=0)].T
    outdegree_df = outdegree_df.T[outdegree_df.any(axis=0)].T
    return indegree_df, outdegree_df

In [None]:
indegree_df_with_selfloops, outdegree_df_with_selfloops = make_degree_dfs(self_loops=True)

In [None]:
indegree_df, outdegree_df = make_degree_dfs(self_loops=False)

In [None]:
internal_ref_df = indegree_df_with_selfloops - indegree_df

#### Title self-references

In [None]:
plt.rcParams['figure.figsize'] = (32,12)
plt.rcParams['font.size'] = 20
internal_ref_df.T[[x for x in range(1994, 2019, 4)]].T.sort_index(ascending=False).plot.barh(stacked=True)
plt.legend(title='Title', fontsize=16, loc='upper right', ncol=3)
plt.xticks(range(0,70000,5000))
plt.xlim(0,65000)
plt.xlabel('Internal references')
plt.ylabel('Year')
plt.tight_layout()
plt.savefig('../graphics/us-internal-references-per-title.pdf')

#### Title in-degree

In [None]:
plt.rcParams['figure.figsize'] = (32,12)
plt.rcParams['font.size'] = 20
indegree_df.T[[x for x in range(1994, 2019, 4)]].T.sort_index(ascending=False).plot.barh(stacked=True)
plt.legend(title='Title', fontsize=16, loc='upper right', ncol=3)
plt.xticks(range(0,25000,1000))
plt.xlim(0,22000)
plt.xlabel('References incoming from other titles')
plt.ylabel('Year')
plt.tight_layout()
plt.savefig('../graphics/us-indegree-per-title.pdf')

#### Title out-degree

In [None]:
plt.rcParams['figure.figsize'] = (32,12)
plt.rcParams['font.size'] = 20
outdegree_df.T[[x for x in range(1994, 2019, 4)]].T.sort_index(ascending=False).plot.barh(stacked=True)
plt.legend(title='Title', fontsize=16, loc='upper right', ncol=3)
plt.xticks(range(0,25000,1000))
plt.xlim(0,22000)
plt.xlabel('References outgoing to other titles')
plt.ylabel('Year')
plt.tight_layout()
plt.savefig('../graphics/us-outdegree-per-title.pdf')

### Number of structures, including subseqitems

In [None]:
def get_title_number(title):
    return int(title.split('-', 1)[0].split(' ')[-1])

In [None]:
structure_df = pd.DataFrame(columns=range(1,55))
for year in range(1994,2019):
    G = nx.read_gpickle(f'../../legal-networks-data/us/4_crossreference_graph/subseqitems/{year}.gpickle.gz')
    substructures = Counter([law_name for _, law_name in G.nodes(data='law_name') if law_name != 'root'])
    substructures = {get_title_number(title): substructures[title] for title in substructures}
    structure_df.loc[year] = [substructures[n] if n in substructures else 0 for n in range(1,55)]
structure_df = structure_df.T[structure_df.any(axis=0)].T

In [None]:
plt.rcParams['figure.figsize'] = (32,12)
plt.rcParams['font.size'] = 20
structure_df.T[[x for x in range(1994, 2019, 4)]].T.sort_index(ascending=False).plot.barh(stacked=True)
plt.legend(title='Title', fontsize=16, loc='upper right', ncol=3)
plt.xticks(range(0,1000000,50000))
plt.xlim(0,850000)
plt.xlabel('Number of substructures')
plt.ylabel('Year')
plt.tight_layout()
plt.savefig('../graphics/us-structures-with-subseqitems-per-title.pdf')

### End