### Goal of this notebook

Visualize case citations based on HUDOC metadata: in this notebook I prepare the Gephi edges/nodes.

IMPORTANT: only JUDGMENT citations are included. Decisions etc. are ignored.

Generation -1: all judgments that have outgoing citations to our annotated jugdments.
Generation  0: our annotated judgments.
Generation  1: all judgments that are cited by generation 0.

In [1]:
save = False

import helpers
import api
import importlib
import pandas as pd

from warnings import simplefilter

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

def reload():
    importlib.reload(api)
    importlib.reload(helpers)

### Step 1 -- For each annotated judgment, fetch metadata from HUDOC

In [2]:
reload()

# ECLI numbers of all cases we annotated
ecli_nrs_annotations = pd.read_csv('data/annotations/ECLI_case_references.csv', index_col='ECLI number').index.to_list()

# For each case, fetch HUDOC metadata
metadata_annotations = await api.hudoc_judgments_metadata(by='ecli', cases=ecli_nrs_annotations)

>> Start iteration for 190 ECLIs.
>> Start iteration for 100 ECLIs.
>> Done, could not improve further. Identified 90 judgments in 190 ECLIs.


### Step 2 -- Metadata JUDGMENTS generation 0

Each row corresponds to an annotated judgment (i.e. generation 0).

In [5]:
# Construct DataFrame from fetched HUDOC metadata
df_metadata = pd.DataFrame(metadata_annotations).set_index('ecli')

# Parse application numbers (i.e. cited ECHR cases) and convert them to a list
for col in ['extractedappno', 'sclappnos']:
    df_metadata[col] = df_metadata[col].apply(lambda x: x.split(';'))

df_metadata

Unnamed: 0_level_0,sharepointid,Rank,ECHRRanking,languagenumber,itemid,docname,doctype,application,appno,conclusion,...,documentcollectionid2,languageisocode,extractedappno,isplaceholder,doctypebranch,respondent,advopidentifier,advopstatus,appnoparts,sclappnos
ecli,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ECLI:CE:ECHR:2001:0118JUD002487694,324307,358.8876953125,1501,1,001-59156,CASE OF COSTER v. THE UNITED KINGDOM,HEJUD,MS WORD,24876/94,No violation of Art. 8;No violation of P1-1;No...,...,CASELAW;JUDGMENTS;GRANDCHAMBER;ENG,ENG,"[24876/94, 28/77, 57/78, 1/94, 18/94]",False,GRANDCHAMBER,GBR,,,24876;94,"[20348/92, 10843/84, 7525/76, 9063/80, 16969/9..."
ECLI:CE:ECHR:2001:0118JUD002528994,422449,358.8876953125,1501,1,001-59157,CASE OF LEE v. THE UNITED KINGDOM,HEJUD,MS WORD,25289/94,No violation of Art. 8;No violation of P1-1;No...,...,CASELAW;JUDGMENTS;GRANDCHAMBER;ENG,ENG,"[25289/94, 28/77, 57/78, 1/94, 18/94]",False,GRANDCHAMBER,GBR,,,25289;94,"[20348/92, 10843/84, 7525/76, 9063/80, 16969/9..."
ECLI:CE:ECHR:2001:0118JUD002515494,324309,358.8876953125,1501,1,001-59158,CASE OF JANE SMITH v. THE UNITED KINGDOM,HEJUD,MS WORD,25154/94,No violation of Art. 8;No violation of P1-1;No...,...,CASELAW;JUDGMENTS;GRANDCHAMBER;ENG,ENG,"[25154/94, 28/77, 57/78, 1/94, 18/94]",False,GRANDCHAMBER,GBR,,,25154;94,"[19178/91, 20348/92, 10843/84, 7525/76, 9063/8..."
ECLI:CE:ECHR:2001:0118JUD002488294,324306,358.8876953125,1501,1,001-59155,CASE OF BEARD v. THE UNITED KINGDOM,HEJUD,MS WORD,24882/94,No violation of Art. 8;No violation of Art. 14,...,CASELAW;JUDGMENTS;GRANDCHAMBER;ENG,ENG,"[24882/94, 28/77, 57/78, 1/94, 18/94]",False,GRANDCHAMBER,GBR,,,24882;94,"[20348/92, 10843/84, 7525/76, 9063/80, 16969/9..."
ECLI:CE:ECHR:1996:0916JUD002189393,323067,300.103729248047,1501,1,001-58062,CASE OF AKDİVAR AND OTHERS v. TURKEY,HEJUD,MS WORD,21893/93,Preliminary objection dismissed (Art. 35) Admi...,...,CASELAW;JUDGMENTS;GRANDCHAMBER;ENG,ENG,"[21893/93, 788/60, 5583/72, 10148/82, 1994/31]",False,GRANDCHAMBER,TUR,,,21893;93,"[5493/72, 5310/71, 7654/76, 9697/82, 11889/85,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ECLI:CE:ECHR:2007:0215JUD004385498,341080,491.113433837891,1281,1,001-79488,CASE OF SOYLU v. TURKEY,HEJUD,MS WORD,43854/98,No violation of Art. 3;No violation of Art. 8;...,...,CASELAW;JUDGMENTS;CHAMBER;ENG,ENG,"[43854/98, 26973/95, 23423/94, 8811/02, 8813/0...",False,CHAMBER,TUR,,,43854;98,[]
ECLI:CE:ECHR:1998:0424JUD002318494,443191,317.281280517578,1281,1,001-58162,CASE OF SELÇUK AND ASKER v. TURKEY,HEJUD,MS WORD,23184/94;23185/94,Preliminary objection rejected;Violation of Ar...,...,CASELAW;JUDGMENTS;CHAMBER;ENG,ENG,"[23184/94, 23185/94, 1994/57]",False,CHAMBER,TUR,,,23184;94;23185;94,"[14038/88, 21893/93, 21987/93, 23878/94, 23186..."
ECLI:CE:ECHR:2012:0216JUD000908904,366147,708.819396972656,1281,1,001-109119,CASE OF KONTSEVYCH v. UKRAINE,HEJUD,MS WORD,9089/04,Violation of Article 6 - Right to a fair trial...,...,CASELAW;JUDGMENTS;CHAMBER;ENG,ENG,"[9089/04, 8415/02, 40450/04, 75139/01, 48757/9...",False,CHAMBER,UKR,,,9089;04,[]
ECLI:CE:ECHR:2016:1020JUD003400007,457459,1202.60241699219,1281,1,001-167557,CASE OF VINNIYCHUK v. UKRAINE,HEJUD,MS WORD,34000/07,Violation of Article 8 - Right to respect for ...,...,CASELAW;JUDGMENTS;CHAMBER;ENG,ENG,"[34000/07, 18639/03, 31365/96, 50963/99, 21447...",False,CHAMBER,UKR,,,34000;07,[]


### Step 3 -- Outgoing citations (generation 0 -> 1)

Each row corresponds to an annotated judgment (i.e. generation 0).

Each column corresponds to a cited judgment (i.e. generation 1).

In [6]:
reload()

# Find outgoing citations of generation 0
df_outgoing_citations, outgoing_citations_ecli_map = await helpers.construct_outgoing_citations_dataframe(
    df_metadata, generations=[0]
)

# Convert ECLI to case names.
df_outgoing_citations = helpers.convert_ecli_to_case_names(
    df_outgoing_citations, metadata_annotations, outgoing_citations_ecli_map
)

>> Start iteration for 783 APPNOs.
>> Start iteration for 159 APPNOs.
>> Done, could not improve further. Identified 624 judgments in 783 APPNOs.

Generation(s) [0] cited 505 unique cases.


In [7]:
df_outgoing_citations

Unnamed: 0_level_0,CASE OF ŠKRTIĆ v. CROATIA,CASE OF İPEK v. TURKEY,CASE OF ĐORĐEVIĆ v. CROATIA,CASE OF ČONKA v. BELGIUM,CASE OF ĆOSIĆ v. CROATIA,CASE OF ÖZTÜRK v. TURKEY,CASE OF ÖNERYILDIZ v. TURKEY,CASE OF ÇETİN AND OTHERS v. TURKEY [Extracts],CASE OF ÇAKICI v. TURKEY,CASE OF ZWIERZYNSKI v. POLAND,...,CASE OF AKTAŞ v. TURKEY,CASE OF AKSOY v. TURKEY,CASE OF AKMAN v. TURKEY,CASE OF AKKUŞ v. TURKEY,CASE OF AKIMOVA v. AZERBAIJAN,CASE OF AKDİVAR AND OTHERS v. TURKEY (ARTICLE 50),CASE OF AHMET ÖZKAN AND OTHERS v. TURKEY,CASE OF ACHOUR v. FRANCE,CASE OF A. v. THE UNITED KINGDOM,CASE OF A. v. CROATIA
case_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CASE OF AHMET ÖZKAN AND OTHERS v. TURKEY,False,False,False,False,False,False,False,False,True,False,...,True,True,False,False,False,True,False,False,False,False
CASE OF AKDİVAR AND OTHERS v. TURKEY,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
CASE OF AKSAKAL v. TURKEY,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
CASE OF ALIVERDIYEV v. RUSSIA,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
CASE OF ALTUN v. TURKEY,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CASE OF ZRILIĆ v. CROATIA,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
CASE OF ÖZTOPRAK AND OTHERS v. TURKEY,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
CASE OF ĆOSIĆ v. CROATIA,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
CASE OF ŞAYLI v. TURKEY,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False


NB: notice 86 rows instead of the previous 90 rows. I excluded cases without ciations.

### Step 4 -- Incoming citations (generation -1 -> 0)

Each row corresponds to an incoming citation (i.e. generation -1).

Each column corresponds to an annotated judgment (i.e. generation 0).

In [65]:
reload()

# All unique applicaiton numbers of generation 0
appnos_gen_0 = set()
for appnos in df_metadata['appno']:
    for appno in appnos.split(';'):
        appnos_gen_0.add(appno)

# Fetch metadata of judgments that cite generation 0
metadata_icoming_citations = await api.hudoc_judgments_incoming_citations_metadata(
    by='appno',
    cases=appnos_gen_0,
    delay=0.02
)

>> Start iteration for 114 APPNOs.
>> Start iteration for 55 APPNOs.
>> Done, could not improve further. Found incoming citations for 59 judgments in 114 APPNOs.


In [9]:
# Create a map of all ECLIs that cite any appno in generation 0

ecli_map_incoming_citations = dict()
for judgments in metadata_icoming_citations:
    for judgment in judgments:
        ecli_map_incoming_citations[judgment['ecli']] = judgment

print(f"There are {len(ecli_map_incoming_citations)} judgments that cite a judgment of generation 0")

There are 283 judgments that cite a judgment of generation 0


In [74]:
reload()

# Construct DataFrame of incoming citations to generation 0

df_incoming_citations = pd.DataFrame(index=ecli_map_incoming_citations.keys(), columns=df_metadata.index)

for ecli, metadata in ecli_map_incoming_citations.items():

    cited_cases = metadata['sclappnos'].split(';')

    # Only keep citation if case is in generation 0
    cited_cases = [case for case in cited_cases if case in appnos_gen_0]

    # Convert appno citations to ECLI (so that they can be added to the DF)
    cited_cases = [metadata['ecli'] for metadata in await api.hudoc_judgments_metadata(
        by='appno', cases=cited_cases, output=False
    )]

    for cited_ecli in cited_cases:
        # Only keep citation if case is in generation 0
        if cited_ecli in df_outgoing_citations.index:
            df_incoming_citations.at[ecli, cited_ecli] = True

df_incoming_citations

ecli,ECLI:CE:ECHR:2001:0118JUD002487694,ECLI:CE:ECHR:2001:0118JUD002528994,ECLI:CE:ECHR:2001:0118JUD002515494,ECLI:CE:ECHR:2001:0118JUD002488294,ECLI:CE:ECHR:1996:0916JUD002189393,ECLI:CE:ECHR:1997:1128JUD002318694,ECLI:CE:ECHR:2009:1022JUD000357206,ECLI:CE:ECHR:2010:1202JUD003085603,ECLI:CE:ECHR:2009:0115JUD002826106,ECLI:CE:ECHR:2018:1127JUD004456011,...,ECLI:CE:ECHR:2006:0202JUD003324096,ECLI:CE:ECHR:2006:0202JUD003324396,ECLI:CE:ECHR:2006:0202JUD003324796,ECLI:CE:ECHR:2006:0202JUD003621197,ECLI:CE:ECHR:2007:0215JUD003785097,ECLI:CE:ECHR:2007:0215JUD004385498,ECLI:CE:ECHR:1998:0424JUD002318494,ECLI:CE:ECHR:2012:0216JUD000908904,ECLI:CE:ECHR:2016:1020JUD003400007,ECLI:CE:ECHR:2017:1214JUD001995707
ECLI:CE:ECHR:2021:0622JUD004016507,,,,,,,,,,,...,,,,,,,,,,
ECLI:CE:ECHR:2019:0110JUD006528613,,,,,,,,,,,...,,,,,,,,,,
ECLI:CE:ECHR:2016:0119JUD002708113,,,,,,,,,,,...,,,,,,,,,,
ECLI:CE:ECHR:2015:0707JUD006012511,,,,,,,,,,,...,,,,,,,,,,
ECLI:CE:ECHR:2013:1024JUD004442508,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ECLI:CE:ECHR:2006:1207JUD002975904,,,,,,,,,,,...,,,,,,,,,,
ECLI:CE:ECHR:2006:1207JUD001489804,,,,,,,,,,,...,,,,,,,,,,
ECLI:CE:ECHR:2006:1116JUD003929902,,,,,,,,,,,...,,,,,,,,,,
ECLI:CE:ECHR:2012:1023JUD002460404,,,,,,,,,,,...,,,,,,,,,,


In [11]:
reload()

# Convert generation 0 (columns) to case names

names_gen_0 = [metadata['docname'] for metadata in await api.hudoc_judgments_metadata(
    by='ecli',
    cases=list(df_incoming_citations.columns),
    output=False
)]

for ecli, name in zip(df_incoming_citations.columns, names_gen_0):
    df_incoming_citations.rename({ecli: name}, inplace=True, axis=1)

In [12]:
reload()

# Convert generation -1 (index) to case names
names_gen_m1 = [metadata['docname'] for metadata in await api.hudoc_judgments_metadata(
    by='ecli',
    cases=list(df_incoming_citations.index),
    output=False
)]

for ecli, name in zip(df_incoming_citations.index, names_gen_m1):
    df_incoming_citations.rename({ecli: name}, inplace=True, axis=0)

In [13]:
df_incoming_citations

ecli,CASE OF COSTER v. THE UNITED KINGDOM,CASE OF LEE v. THE UNITED KINGDOM,CASE OF JANE SMITH v. THE UNITED KINGDOM,CASE OF BEARD v. THE UNITED KINGDOM,CASE OF AKDİVAR AND OTHERS v. TURKEY,CASE OF MENTEŞ AND OTHERS v. TURKEY,CASE OF PAULIC v. CROATIA,CASE OF KRYVITSKA AND KRYVITSKYY v. UKRAINE,CASE OF ĆOSIĆ v. CROATIA,CASE OF POPOV AND OTHERS v. RUSSIA,...,CASE OF AĞTAŞ v. TURKEY,CASE OF ŞAYLI v. TURKEY,CASE OF ÖZTOPRAK AND OTHERS v. TURKEY,CASE OF KUMRU YILMAZ AND OTHERS v. TURKEY,CASE OF AKSAKAL v. TURKEY,CASE OF SOYLU v. TURKEY,CASE OF SELÇUK AND ASKER v. TURKEY,CASE OF KONTSEVYCH v. UKRAINE,CASE OF VINNIYCHUK v. UKRAINE,CASE OF DAKUS v. UKRAINE
CASE OF ADZHIGITOVA AND OTHERS v. RUSSIA,,,,,,,,,,,...,,,,,,,,,,
CASE OF KHADIJA ISMAYILOVA v. AZERBAIJAN,,,,,,,,,,,...,,,,,,,,,,
CASE OF SOW v. BELGIUM,,,,,,,,,,,...,,,,,,,,,,
CASE OF V.M. AND OTHERS v. BELGIUM,,,,,,,,,,,...,,,,,,,,,,
CASE OF BAKLANOV v. UKRAINE,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CASE OF MACINKOVIC v. CROATIA,,,,,,,,,,,...,,,,,,,,,,
CASE OF SAMIJA v. CROATIA,,,,,,,,,,,...,,,,,,,,,,
CASE OF MUZEVIC v. CROATIA,,,,,,,,,,,...,,,,,,,,,,
CASE OF NİHAYET ARICI AND OTHERS v. TURKEY,,,,,,,,,,,...,,,,,,,,,,


### Step 5 -- Nodes and edges of generations -1, 0 and 1

In [64]:
df_incoming_citations.loc['CASE OF ZOUBOULIDIS v. GREECE  (No. 2)']['CASE OF LARKOS v. CYPRUS']

True

In [14]:
# Nodes

names_gen_m1 = list(df_incoming_citations.index)
names_gen_0 = list(df_incoming_citations.columns)
names_gen_1 = list(df_outgoing_citations.columns)

nodes = list(set(names_gen_m1 + names_gen_0 + names_gen_1))

df_nodes = pd.DataFrame({
    'Label': nodes
})

df_nodes.index.name = 'Id'

# Boolean for which generation(s) the judgment belongs to (can be multiple)
df_nodes['generation -1'] = df_nodes['Label'].map(lambda _name: _name in names_gen_m1)
df_nodes['generation 0'] = df_nodes['Label'].map(lambda _name: _name in names_gen_0)
df_nodes['generation 1'] = df_nodes['Label'].map(lambda _name: _name in names_gen_1)

# Construct a single identifier that includes all generations
def generation_identifier(label: str):
    row = df_nodes[df_nodes['Label'] == label]

    identifier = []
    for generation in ['-1', '0', '1']:
        if row[f'generation {generation}'].bool():
            identifier.append(generation)

    return ' '.join(identifier)

df_nodes['generation identifier'] = df_nodes['Label'].map(generation_identifier)

if save:
    df_nodes.to_csv('./visualization/hudoc/nodes_gen_m1_0_1.csv')

df_nodes

Unnamed: 0_level_0,Label,generation -1,generation 0,generation 1,generation identifier
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,CASE OF SAMIJA v. CROATIA,True,False,False,-1
1,CASE OF J.M. v. THE UNITED KINGDOM,True,False,False,-1
2,CASE OF FYODOROV AND FYODOROVA v. UKRAINE,False,False,True,1
3,CASE OF TALAT TEPE v. TURKEY,True,False,False,-1
4,CASE OF AMROLLAHI v. DENMARK,False,False,True,1
...,...,...,...,...,...
771,CASE OF VINCENT v. FRANCE,True,False,False,-1
772,CASE OF AZIMOV v. RUSSIA,True,False,False,-1
773,CASE OF BOCHAN v. UKRAINE (No. 2),False,False,True,1
774,CASE OF RANINEN v. FINLAND,False,False,True,1


In [13]:
# Edges

df_edges = pd.DataFrame(columns=['Source', 'Source Label', 'Target', 'Target Label', 'Type'])

for df in [df_incoming_citations, df_outgoing_citations]:

    for case_name, cases in df.iterrows():
        cited_cases = cases[cases == True].index.tolist()

        for cited_case_name in cited_cases:

            df_edges = pd.concat([df_edges, pd.DataFrame({
                'Source': df_nodes[df_nodes['Label'] == case_name].index[0],
                'Target': df_nodes[df_nodes['Label'] == cited_case_name].index[0],
                'Source Label': case_name,
                'Target Label': cited_case_name,
                'Type': 'Directed',
            }, index=[0])], ignore_index=True)

if save:
    df_edges.to_csv('./visualization/hudoc/edges_gen_m1_0_1.csv', index=False)

df_edges

Unnamed: 0,Source,Source Label,Target,Target Label,Type
0,625,CASE OF ALTUN v. TURKEY,757,CASE OF MENTEŞ AND OTHERS v. TURKEY,Directed
1,625,CASE OF ALTUN v. TURKEY,165,CASE OF YÖYLER v. TURKEY,Directed
2,625,CASE OF ALTUN v. TURKEY,713,CASE OF SELÇUK AND ASKER v. TURKEY,Directed
3,377,CASE OF ALEKSEYEV AND OTHERS v. RUSSIA,350,CASE OF KOZAK v. POLAND,Directed
4,524,CASE OF HASANALI ALIYEV AND OTHERS v. AZERBAIJAN,293,CASE OF KRYVITSKA AND KRYVITSKYY v. UKRAINE,Directed
...,...,...,...,...,...
1673,232,CASE OF ŠKRTIĆ v. CROATIA,498,CASE OF McCANN v. THE UNITED KINGDOM,Directed
1674,232,CASE OF ŠKRTIĆ v. CROATIA,178,CASE OF IATRIDIS v. GREECE (ARTICLE 41),Directed
1675,232,CASE OF ŠKRTIĆ v. CROATIA,710,CASE OF CONNORS v. THE UNITED KINGDOM,Directed
1676,232,CASE OF ŠKRTIĆ v. CROATIA,430,CASE OF BJEDOV v. CROATIA,Directed
