In [31]:
import json
import requests
from string import Template

cases_cache = {}

def get_metadata_for_case_id(case_id: str):
    if case_id in cases_cache:
        return cases_cache[case_id]["docname"]
    
    url_template = Template('https://hudoc.echr.coe.int/app/query/results?query=((itemid%3A\"$case_id\"))&select=sharepointid,rank,echrranking,languagenumber,itemid,docname,doctype,application,appno,conclusion,importance,originatingbody,typedescription,kpdate,kpdateastext,documentcollectionid,documentcollectionid2,languageisocode,extractedappno,isplaceholder,doctypebranch,respondent,advopidentifier,advopstatus,ecli,appnoparts,sclappnos,ECHRConcepts&sort=&start=0&length=20&rankingModelId=11111111-0000-0000-0000-000000000000')
    url = url_template.substitute(case_id=case_id)
    res = requests.get(url)
    data = res.json()
    return data["results"][0]["columns"]


metadata = get_metadata_for_case_id("001-222891")
print(json.dumps(metadata, indent=2))

{
  "isplaceholder": "False",
  "kpdateastext": "30/11/2022 00:00:00",
  "documentcollectionid2": "CASELAW;DECISIONS;DECGRANDCHAMBER;FRA;FRE",
  "ecli": "ECLI:CE:ECHR:2022:1130DEC000801916",
  "conclusion": "Exceptions pr\u00e9liminaires jointes au fond;Partiellement recevable;Partiellement irrecevable (Art. 35) Conditions de recevabilit\u00e9;(Art. 35-1) \u00c9puisement des voies de recours internes;(Art. 35-3-a) Manifestement mal fond\u00e9;(Art. 35-3-a) Ratione materiae",
  "typedescription": "8",
  "appno": "8019/16;43800/14;28525/20",
  "echrranking": "1411",
  "doctype": "HFDEC",
  "advopidentifier": null,
  "appnoparts": "8019;16;43800;14;28525;20",
  "application": "MS WORD",
  "advopstatus": null,
  "originatingbody": "8",
  "sclappnos": "21894/93;61498/08;55721/07;11157/04;15162/05;45653/99;788/60;52207/99;41090/98;48347/08;72967/01;59532/00;11676/04;23380/09;20914/07;43370/04;27693/95;13216/05;41794/04;6780/74;6950/75;25781/94;34382/97;3321/67;3322/67;3323/67;3344/67;46113/9

In [56]:
def find_eng_version(metadata):
    app_numbers = metadata["appno"].split(";")
    fst = app_numbers[0].split("/")[0]
    snd = app_numbers[0].split("/")[1]

    url = f"https://hudoc.echr.coe.int/app/query/results?query=(contentsitename=ECHR)%20AND%20((appno.keyword%3A%22{fst}%2F{snd}%22)%20OR%20((advopidentifier.keyword%3A%22{fst}%2F{snd}%22)%20AND%20doctype%3AADV*)%20OR%20((extractedappno.keyword%3A%22{fst}%2F{snd}%22)%20AND%20doctype%3APR))&select=itemid,appno,extractedappno,documentcollectionid,kpdate,languageisocode,isplaceholder,advopidentifier&sort=&start=0&length=500"
    res = requests.get(url)
    data = res.json()
    data = [item for item in data["results"] if "languageisocode" in item["columns"] and item["columns"]["languageisocode"] == "ENG" and "itemid" in item["columns"] and "001-" in item["columns"]["itemid"]]
    try:
        print("Data:\n", json.dumps(data, indent=2))
        print("App nr", fst, snd)
        id = data[0]["columns"]["itemid"]
        new_metadata = get_metadata_for_case_id(id)
        doc_name = new_metadata["docname"]
        print(f"English version found: {doc_name}")
        print(f"French version: {metadata['docname']}")
        return id, doc_name
    except:
        return "NaN", "NaN"

In [33]:
import json
import pandas as pd

from utils.fetch_pdf import fetch_pdf_content
import PyPDF2

def get_associated_cases_df(guide_url: str):
    content = fetch_pdf_content(guide_url)
    reader = PyPDF2.PdfReader(content)
    key = '/Annots'
    uri = '/URI'
    ank = '/A'
    df = pd.DataFrame(columns=["page_number", "citations", "page_text"])

    for i in range(0,len(reader.pages)):
        page = reader.pages[i]
        pageObject = page.get_object()
        pageText = page.extract_text()
        page_citations = {}

        if key in pageObject.keys():
            ann = pageObject[key]
            for a in ann:
                u = a.get_object()
                try:
                    if uri in u[ank].keys():
                        url = u[ank][uri]
                        case_id = url.split('=')[-1]
                        metadata = get_metadata_for_case_id(case_id)
                        docname = metadata["docname"]
                        if metadata["languageisocode"] == "FRE":
                            find_eng_version(metadata)
                        page_citations[case_id] = docname
                        print(f"Page {i}: {docname}")
                        print()
                except:
                    pass
        df = df._append({"page_number": i, "citations": json.dumps(page_citations), "page_text": pageText}, ignore_index=True)

    return df

In [34]:
associated_cases_df = get_associated_cases_df("https://ks.echr.coe.int/documents/d/echr-ks/guide_art_1_eng")
associated_cases_df

Page 4: CASE OF IRELAND v. THE UNITED KINGDOM

Page 4: CASE OF JERONOVIČS v. LATVIA

Page 4: CASE OF KONSTANTIN MARKIN v. RUSSIA 

Page 4: CASE OF KONSTANTIN MARKIN v. RUSSIA 

Page 4: CASE OF BOSPHORUS HAVA YOLLARI TURİZM VE TİCARET ANONİM ŞİRKETİ v. IRELAND

Page 4: CASE OF N.D. AND N.T. v. SPAIN

Page 4: CASE OF GRZĘDA v. POLAND

Page 5: CASE OF CATAN AND OTHERS v. THE REPUBLIC OF MOLDOVA AND RUSSIA

Page 5: CASE OF CATAN AND OTHERS v. THE REPUBLIC OF MOLDOVA AND RUSSIA

English version found: SLOVENIA v. CROATIA
French version: SLOVENIA v. CROATIA
Page 5: SLOVÉNIE c. CROATIE

Page 5: CASE OF SVETOVA AND OTHERS v. RUSSIA

English version found: UKRAINE AND THE NETHERLANDS v. RUSSIA
French version: UKRAINE AND THE NETHERLANDS v. RUSSIA
Page 5: UKRAINE ET PAYS-BAS c. RUSSIE

Page 6: BANKOVIĆ AND OTHERS v. BELGIUM AND OTHERS

Page 6: BANKOVIĆ AND OTHERS v. BELGIUM AND OTHERS

English version found: UKRAINE v. RUSSIA (RE CRIMEA)
French version: UKRAINE v. RUSSIA (RE CRIMEA)
Page 6: UKRA

Unnamed: 0,page_number,citations,page_text
0,0,{},\n \n \nGuide on A rticle 1 \nof the Europe...
1,1,{},Guide on Article 1 of the Convention – Obligat...
2,2,{},Guide on Article 1 of the Convention – Obligat...
3,3,{},Guide on Article 1 of the Convention – Obligat...
4,4,"{""001-57506"": ""CASE OF IRELAND v. THE UNITED K...",Guide on Article 1 of the Convention – Obligat...
5,5,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH...",Guide on Article 1 of the Convention – Obligat...
6,6,"{""001-22099"": ""BANKOVI\u0106 AND OTHERS v. BEL...",Guide on Article 1 of the Convention – Obligat...
7,7,"{""001-222891"": ""UKRAINE ET PAYS-BAS c. RUSSIE""...",Guide on Article 1 of the Convention – Obligat...
8,8,"{""001-61875"": ""CASE OF ASSANIDZE v. GEORGIA"", ...",Guide on Article 1 of the Convention – Obligat...
9,9,"{""001-44291"": ""MARKOVIC et AUTRES contre l'ITA...",Guide on Article 1 of the Convention – Obligat...


In [6]:
import re
import pandas as pd

def simplify_text_for_entailment(text: str):
    # remove all spaces, newlines, tabs
    cleanedText = re.sub(r'\s+', ' ', text)
    # make all characters lowercase
    cleanedText = cleanedText.lower()
    # remove all non-alphanumeric characters
    cleanedText = re.sub(r'\W', '', cleanedText)

    return cleanedText

def get_possible_citations_for_paragraphs_df(paragraphs_df: pd.DataFrame, associated_cases_df: pd.DataFrame):
    pdf = paragraphs_df.copy()
    ac_df = associated_cases_df.copy()
    for i, paragraph in pdf.iterrows():
        par = paragraph["paragraph"]
        cleaned_par_start = simplify_text_for_entailment(par[0:200])
        cleaned_par_end = simplify_text_for_entailment(par[-100:])

        possible_citations = {}

        for j, page in ac_df.iterrows():
            page_text = page["page_text"]
            cleaned_page_text = simplify_text_for_entailment(page_text)

            if cleaned_par_start in cleaned_page_text:
                possible_citations.update(json.loads(page["citations"]))
            if cleaned_par_end in cleaned_page_text:
                possible_citations.update(json.loads(page["citations"]))

        pdf.at[i, "possible_citations"] = json.dumps(possible_citations)
    return pdf

In [7]:
paragraphs_df = pd.read_csv("data/echr_case_law_guides.csv")
paragraphs_df = paragraphs_df[paragraphs_df["guide_id"] == "guide_art_1_eng"]

combined_df = get_possible_citations_for_paragraphs_df(paragraphs_df, associated_cases_df)
combined_df

Unnamed: 0,paragraph,guide_id,paragraph_id,possible_citations
6637,"As provided by Article 1, the engagement under...",guide_art_1_eng,1,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
6638,"In the Convention context, the term jurisdicti...",guide_art_1_eng,2,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
6639,"Historically, the text drawn up by the Committ...",guide_art_1_eng,3,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
6640,The adoption of Article 1 of the Convention wa...,guide_art_1_eng,4,"{""001-22099"": ""BANKOVI\u0106 AND OTHERS v. BEL..."
6641,The travaux préparatoires go on to note that t...,guide_art_1_eng,5,"{""001-22099"": ""BANKOVI\u0106 AND OTHERS v. BEL..."
...,...,...,...,...
6790,"Media reports, on the other hand, are to be tr...",guide_art_1_eng,154,"{""001-207626"": ""UKRAINE c. RUSSIE (CRIM\u00c9E..."
6791,The direct evidence of witnesses is also taken...,guide_art_1_eng,155,"{""001-207626"": ""UKRAINE c. RUSSIE (CRIM\u00c9E..."
6792,The Court may also rely on witness statements ...,guide_art_1_eng,156,"{""001-207626"": ""UKRAINE c. RUSSIE (CRIM\u00c9E..."
6793,There is no need for direct evidence from alle...,guide_art_1_eng,157,"{""001-207626"": ""UKRAINE c. RUSSIE (CRIM\u00c9E..."


In [8]:
from guide_parser import GuideParser

parsers = {
    "guide_art_1_eng": GuideParser(guide_id="guide_art_1_eng", remove_patterns=["Concepts of “jurisdiction” and imputability"]),
    "guide_art_2_eng": GuideParser(guide_id="guide_art_2_eng"),
    "guide_art_3_eng": GuideParser(guide_id="guide_art_3_eng", starting_string="1.  The Court’s approach to the interpretation"),
    "guide_art_4_eng": GuideParser(guide_id="guide_art_4_eng"),
    "guide_art_5_eng": GuideParser(guide_id="guide_art_5_eng"),
    "guide_art_6_civil_eng": GuideParser(guide_id="guide_art_6_civil_eng"),
    "guide_art_6_criminal_eng": GuideParser(guide_id="guide_art_6_criminal_eng"),
    "guide_art_7_eng": GuideParser(guide_id="guide_art_7_eng", starting_string="1.  The guarantee enshrined in Article 7"),
    "guide_art_8_eng": GuideParser(guide_id="guide_art_8_eng"),
    "guide_art_9_eng": GuideParser(guide_id="guide_art_9_eng"),
    "guide_art_10_eng": GuideParser(guide_id="guide_art_10_eng"),
    "guide_art_11_eng": GuideParser(guide_id="guide_art_11_eng"),
    "guide_art_12_eng": GuideParser(guide_id="guide_art_12_eng", starting_string="1.  Article 12 of the Convention guarantees"),
    "guide_art_13_eng": GuideParser(guide_id="guide_art_13_eng"),
    "guide_art_14_art_1_protocol_12_eng": GuideParser(guide_id="guide_art_14_art_1_protocol_12_eng"),
    "guide_art_15_eng": GuideParser(guide_id="guide_art_15_eng"),
    "guide_art_17_eng": GuideParser(guide_id="guide_art_17_eng"),
    "guide_art_18_eng": GuideParser(guide_id="guide_art_18_eng"),
    "Admissibility_guide_ENG": GuideParser(guide_id="Admissibility_guide_ENG", url="https://www.echr.coe.int/documents/d/echr/", starting_string="1.  The  system  of  protection  of  fundamental  rights"),
    "guide_art_46_eng": GuideParser(guide_id="guide_art_46_eng", starting_string="1.  One of the most significant features of the Convention"),
    "guide_art_1_protocol_1_eng": GuideParser(guide_id="guide_art_1_protocol_1_eng"),
    "guide_art_2_protocol_1_eng": GuideParser(guide_id="guide_art_2_protocol_1_eng"),
    "guide_art_3_protocol_1_eng": GuideParser(guide_id="guide_art_3_protocol_1_eng"),
    "guide_art_2_protocol_4_eng": GuideParser(guide_id="guide_art_2_protocol_4_eng", starting_string="1.  Article 2 of Protocol No. 4 guarantees three"),
    "guide_art_3_protocol_4_eng": GuideParser(guide_id="guide_art_3_protocol_4_eng", starting_string="1.  Article 3, Protocol No. 4 guarantees two"),
    "guide_art_4_protocol_4_eng": GuideParser(guide_id="guide_art_4_protocol_4_eng"),
    "guide_art_1_protocol_7_eng": GuideParser(guide_id="guide_art_1_protocol_7_eng", starting_string="1.  Protocol No. 7 to the European Convention"),
    "guide_art_2_protocol_7_eng": GuideParser(guide_id="guide_art_2_protocol_7_eng", starting_string="1.  Article 2  of  Protocol  No.  7  complements  the  guarantees"),
    "guide_art_4_protocol_7_eng": GuideParser(guide_id="guide_art_4_protocol_7_eng", starting_string="1.  Protocol No. 7 to the Convention"),
    "guide_data_protection_eng": GuideParser(guide_id="guide_data_protection_eng", starting_string="1.  Technological progress has led to a quantum"),
    "guide_environment_eng": GuideParser(guide_id="guide_environment_eng", starting_string="1.  The positive obligation on States to take"),
    "guide_immigration_eng": GuideParser(guide_id="guide_immigration_eng", starting_string="1.  The present  document is"),
    "guide_mass_protests_eng": GuideParser(guide_id="guide_mass_protests_eng", starting_string="1.  The  present  Guide  analyses  the  Court’s"),
    "guide_prisoners_rights_eng": GuideParser(guide_id="guide_prisoners_rights_eng", starting_string="1.  The Court is frequently called upon to"),
    "guide_lgbti_rights_eng": GuideParser(guide_id="guide_lgbti_rights_eng", starting_string="1.  The  Convention  is  a  living  instrument  which"),
    "guide_social_rights_eng": GuideParser(guide_id="guide_social_rights_eng", starting_string="1.  The Convention as adopted in 1950 reflected"),
    "guide_terrorism_eng": GuideParser(guide_id="guide_terrorism_eng", starting_string="1.   Since its first ever judgment Lawless v. Ireland")
}

In [10]:
paragraphs_df = pd.read_csv("data/echr_case_law_guides.csv")
df = pd.read_csv("echr_case_law_guides_with_possible_citations.csv")

for key, parser in parsers.items():
    # check if key already exists in df
    if key in df["guide_id"].values:
        print(f"Already completed {key}")
        continue

    pdf_copy = paragraphs_df[paragraphs_df["guide_id"] == key]
    associated_cases_df = get_associated_cases_df(parser.url)
    combined_df = get_possible_citations_for_paragraphs_df(pdf_copy, associated_cases_df)
    
    df = pd.concat([df, combined_df], ignore_index=True)
    df = df.loc[:, ~df.columns.str.startswith('Unnamed')]
    print("Completed:", key)

    df.to_csv("data/echr_case_law_guides_with_possible_citations.csv", index=False)
df

Already completed guide_art_1_eng
Already completed guide_art_2_eng
Already completed guide_art_3_eng
Already completed guide_art_4_eng
Already completed guide_art_5_eng
Already completed guide_art_6_civil_eng
Already completed guide_art_6_criminal_eng
Already completed guide_art_7_eng
Already completed guide_art_8_eng
Already completed guide_art_9_eng
Already completed guide_art_10_eng
Already completed guide_art_11_eng
Already completed guide_art_12_eng
Already completed guide_art_13_eng
Already completed guide_art_14_art_1_protocol_12_eng
Already completed guide_art_15_eng
Already completed guide_art_17_eng
Already completed guide_art_18_eng
Completed: Admissibility_guide_ENG
Completed: guide_art_46_eng
Completed: guide_art_1_protocol_1_eng
Completed: guide_art_2_protocol_1_eng
Completed: guide_art_3_protocol_1_eng
Completed: guide_art_2_protocol_4_eng
Completed: guide_art_3_protocol_4_eng
Completed: guide_art_4_protocol_4_eng
Completed: guide_art_1_protocol_7_eng
Completed: guide_a

Unnamed: 0,paragraph,guide_id,paragraph_id,possible_citations
0,"As provided by Article 1, the engagement under...",guide_art_1_eng,1,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
1,"In the Convention context, the term jurisdicti...",guide_art_1_eng,2,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
2,"Historically, the text drawn up by the Committ...",guide_art_1_eng,3,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
3,The adoption of Article 1 of the Convention wa...,guide_art_1_eng,4,"{""001-22099"": ""BANKOVI\u0106 AND OTHERS v. BEL..."
4,The travaux préparatoires go on to note that t...,guide_art_1_eng,5,"{""001-22099"": ""BANKOVI\u0106 AND OTHERS v. BEL..."
...,...,...,...,...
6790,"As regards family reunion, see the thematic Ca...",guide_terrorism_eng,128,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."
6791,"Under Article 15 of the Convention, in the eve...",guide_terrorism_eng,129,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."
6792,It transpires from the Court’s case-law that t...,guide_terrorism_eng,130,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."
6793,The Court had ruled that terrorism in Northern...,guide_terrorism_eng,131,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."


In [62]:
df = pd.read_csv("data/echr_case_law_guides_with_possible_citations.csv")
df

Unnamed: 0,paragraph,guide_id,paragraph_id,possible_citations
0,"As provided by Article 1, the engagement under...",guide_art_1_eng,1,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
1,"In the Convention context, the term jurisdicti...",guide_art_1_eng,2,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
2,"Historically, the text drawn up by the Committ...",guide_art_1_eng,3,"{""001-114082"": ""CASE OF CATAN AND OTHERS v. TH..."
3,The adoption of Article 1 of the Convention wa...,guide_art_1_eng,4,"{""001-22099"": ""BANKOVI\u0106 AND OTHERS v. BEL..."
4,The travaux préparatoires go on to note that t...,guide_art_1_eng,5,"{""001-22099"": ""BANKOVI\u0106 AND OTHERS v. BEL..."
...,...,...,...,...
6790,"As regards family reunion, see the thematic Ca...",guide_terrorism_eng,128,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."
6791,"Under Article 15 of the Convention, in the eve...",guide_terrorism_eng,129,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."
6792,It transpires from the Court’s case-law that t...,guide_terrorism_eng,130,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."
6793,The Court had ruled that terrorism in Northern...,guide_terrorism_eng,131,"{""001-105612"": ""CASE OF AL-JEDDA v. THE UNITED..."


In [63]:
citations = {}

for i, row in df.iterrows():
    possible_citations = row["possible_citations"]
    possible_citations = json.loads(possible_citations)

    for case_id, docname in possible_citations.items():
        citations[case_id] = docname

len(citations)

6337

In [64]:
french_to_english = []

for citation in citations.keys():
    metadata = get_metadata_for_case_id(citation)
    if metadata["languageisocode"] == "FRE":
        try:
            eng_id, doc_name = find_eng_version(metadata)
            french_to_english.append({"fre_id": citation, "eng_id": eng_id, "fre_docname": citations[citation], "eng_docname": doc_name})
        except:
            french_to_english.append({"fre_id": citation, "eng_id": "NaN", "fre_docname": citations[citation], "eng_docname": "NaN"})

print(len(french_to_english))

Data:
 [
  {
    "columns": {
      "appno": "54155/16",
      "languageisocode": "ENG",
      "kpdate": "2020-11-18T00:00:00",
      "advopidentifier": "",
      "documentcollectionid": "CASELAW;DECISIONS;DECGRANDCHAMBER;ENG",
      "isplaceholder": "False",
      "extractedappno": "54155/16;44574/98;45133/98;48316/99;60642/08;29003/07;1/91;45/94;25/15;65/09;32/93;2/82;496/10;40998/98;176/13;348/12;156/13;373/14;67/12;10/13;200/13;6780/74;6950/75;9944/82;38263/08;5809/08;51357/07;8007/77;25781/94;26126/07;11209/10;33917/12;13999/05;56665/09;57381/00;13255/07;18030/11;8895/10;35841/02;53984/00;2269/06;57691/09;19719/10;29381/09;32684/09;62543/00;65731/01;65900/01;54012/10;24922/12",
      "itemid": "001-206897",
      "rank": "13.631217"
    }
  }
]
App nr 54155 16
English version found: SLOVENIA v. CROATIA
French version: SLOVÉNIE c. CROATIE
Data:
 [
  {
    "columns": {
      "isplaceholder": "False",
      "appno": "8019/16;43800/14;28525/20",
      "advopidentifier": null,
      "k

In [66]:
print(len(french_to_english))

# count how many nan values are in the eng_id column
nan_values = len([item for item in french_to_english if item["eng_id"] == "NaN"])
print(nan_values)

# print rows in which the eng docname and the french docname are very different lengths
for item in french_to_english:
    english_name = item["eng_docname"]
    english_name = english_name.replace("CASE OF ", "")
    french_name = item["fre_docname"]
    french_name = french_name.replace("AFFAIRE ", "")
    if len(french_name) - len(english_name) > 6 and item["eng_id"] != "NaN":
        print(f"French: {french_name}")
        print(f"English: {english_name}")
        print()

1467
270
French: LOIZIDOU c. TURQUIE (EXCEPTIONS PRÉLIMINAIRES)
English: LOIZIDOU AGAINST TURKEY

French: EL-MASRI c. « L'EX-RÉPUBLIQUE YOUGOSLAVE DE MACÉDOINE »
English: EL-MASRI AGAINST NORTH MACEDONIA

French: LERAY, GUILCHER, AMEON, MARGUE et MAD contre la FRANCE
English: LERAY AND OTHERS v. FRANCE

French: PEREIRA HENRIQUES et AUTRES contre le LUXEMBOURG
English: PEREIRA HENRIQUES v. LUXEMBOURG

French: SEGUIN contre la FRANCE
English: SEGUIN v. FRANCE

French: MOGOS contre la ROUMANIE
English: MOGOS v. ROMANIA

French: REFAH PARTISI (PARTI DE PROSPERITE) ET AUTRES contre la TURQUIE
English: REFAH PARTISI (THE WELFARE PARTY) AND OTHERS v. TURKEY

French: LILLY FRANCE S.A. contre la FRANCE
English: LILLY FRANCE v. FRANCE

French: MONTCORNET DE CAUMONT contre la FRANCE
English: MONTCORNET DE CAUMONT v. FRANCE

French: COLOZZA et RUBINAT c. ITALIE
English: COLOZZA v. ITALY

French: MEHMET ULUSOY ET AUTRES c. TURQUIE
English: ULUSOY v. TURKEY

French: JOLIE et autres contre la BELGIQU

In [67]:
french_to_english_df = pd.DataFrame(french_to_english)
french_to_english_df.to_csv("data/french_to_english_citations.csv", index=False)
french_to_english_df

Unnamed: 0,fre_id,eng_id,fre_docname,eng_docname
0,001-207014,001-206897,SLOVÉNIE c. CROATIE,SLOVENIA v. CROATIA
1,001-222891,001-222889,UKRAINE ET PAYS-BAS c. RUSSIE,UKRAINE AND THE NETHERLANDS v. RUSSIA
2,001-207626,001-207622,UKRAINE c. RUSSIE (CRIMÉE),UKRAINE v. RUSSIA (RE CRIMEA)
3,001-92366,001-141152,AFFAIRE STEPHENS c. MALTE (N° 1),CASE OF STEPHENS v. MALTA (No. 1)
4,001-173457,001-186258,AFFAIRE VASILICIUC c. RÉPUBLIQUE DE MOLDOVA,CASE OF VASILICIUC AGAINST THE REPUBLIC OF MOL...
...,...,...,...,...
1462,001-65292,001-4991,AFFAIRE PŁOSKI c. POLOGNE,PLOSKI v. POLAND
1463,001-62076,001-57518,AFFAIRE LAWLESS c. IRLANDE (N° 3),CASE OF LAWLESS v. IRELAND (No. 3)
1464,001-62563,001-192162,AFFAIRE AKSOY c. TURQUIE,CASE OF A. AND OTHERS AGAINST TURKEY AND 204 O...
1465,001-62376,001-829,AFFAIRE BRANNIGAN ET McBRIDE c. ROYAUME-UNI,BRANNIGAN ; McBRIDE v. THE UNITED KINGDOM


In [70]:
def get_mapping(id: str):
    mapping = french_to_english_df[french_to_english_df["fre_id"] == id]
    if len(mapping) == 0 or mapping["eng_id"].values[0] == "NaN":
        return None
    return mapping["eng_id"].values[0], mapping["eng_docname"].values[0]

print(get_mapping("001-222891"))
print(get_mapping("001-222892"))
print(get_mapping("001-22293"))

('001-222889', 'UKRAINE AND THE NETHERLANDS v. RUSSIA')
('001-223162', 'CASE OF C8 (CANAL 8) v. FRANCE')
None


In [71]:
df = pd.read_csv("data/echr_case_law_guides_with_possible_citations.csv")

for i, row in df.iterrows():
    possible_citations = row["possible_citations"]
    possible_citations = json.loads(possible_citations)

    mapped_citations = {}
    for k, v in possible_citations.items():
        mapping = get_mapping(k)
        if mapping:
            mapped_citations[mapping[0]] = mapping[1]
        else:
            mapped_citations[k] = v

    df.at[i, "possible_eng_citations"] = json.dumps(mapped_citations)

df.to_csv("data/echr_case_law_guides_with_possible_eng_citations.csv", index=False)

In [73]:
df = pd.read_csv("data/echr_case_law_guides_with_possible_eng_citations.csv")

for i, row in df.iterrows():
    possible_eng_citations = row["possible_eng_citations"]
    possible_citations = row["possible_citations"]

    if possible_eng_citations != possible_citations:
        print(possible_citations)
        print(possible_eng_citations)

{"001-114082": "CASE OF CATAN AND OTHERS v. THE REPUBLIC OF MOLDOVA AND RUSSIA", "001-207014": "SLOV\u00c9NIE c. CROATIE", "001-222654": "CASE OF SVETOVA AND OTHERS v. RUSSIA", "001-222891": "UKRAINE ET PAYS-BAS c. RUSSIE"}
{"001-114082": "CASE OF CATAN AND OTHERS v. THE REPUBLIC OF MOLDOVA AND RUSSIA", "001-206897": "SLOVENIA v. CROATIA", "001-222654": "CASE OF SVETOVA AND OTHERS v. RUSSIA", "001-222889": "UKRAINE AND THE NETHERLANDS v. RUSSIA"}
{"001-114082": "CASE OF CATAN AND OTHERS v. THE REPUBLIC OF MOLDOVA AND RUSSIA", "001-207014": "SLOV\u00c9NIE c. CROATIE", "001-222654": "CASE OF SVETOVA AND OTHERS v. RUSSIA", "001-222891": "UKRAINE ET PAYS-BAS c. RUSSIE"}
{"001-114082": "CASE OF CATAN AND OTHERS v. THE REPUBLIC OF MOLDOVA AND RUSSIA", "001-206897": "SLOVENIA v. CROATIA", "001-222654": "CASE OF SVETOVA AND OTHERS v. RUSSIA", "001-222889": "UKRAINE AND THE NETHERLANDS v. RUSSIA"}
{"001-114082": "CASE OF CATAN AND OTHERS v. THE REPUBLIC OF MOLDOVA AND RUSSIA", "001-207014": "SL