In [1]:
import re

In [30]:
reporter_exclude_list = ["(QL)"]
series_include_list = ["(2d)", "(3d)", "(4th)", "(5th)", "(6th)", "(7th)", "(8th)", "(9th)"]

sample_citation_list_1 = "[1990] ACS no 115 — [1990] SCJ No 115 (QL) — [1990] CarswellOnt 119 — JE 90-1684 — 11 WCB (2d) 342 — 50 CRR 206 — 60 CCC (3d) 161 — 80 CR (3d) 317 — 36 QAC 161 — 43 OAC 1 — AZ-90111118 — 116 NR 241"
sample_citation_list_2 = "[1991] ACS no 83 — [1991] SCJ No 83 (QL) — [1991] SJ No 83 (QL) — [1991] CarswellAlta 192 — EYB 1991-66887 — JE 91-1713 — 14 WCB (2d) 266 — 18 CRR (2d) 210 — 68 CCC (3d) 1 — 8 CR (4th) 277 — 9 CR (4th) 277 — 120 AR 161 — 83 Alta LR (2d) 193 — [1992] 1 WWR 97 — 8 WAC 161 — AZ-91111102 — 130 NR 277"
sample_citation_list_3 = "[1991] ACS no 90 — 31 MVR (2d) 137 — [1991] SCJ No 90 (QL) — [1991] CarswellBC 280 — JE 91-1764 — 14 WCB (2d) 206 — 7 CRR (2d) 1 — 5 BCAC 161 — 67 CCC (3d) 481 — 8 CR (4th) 82 — 61 BCLR (2d) 145 — 11 WAC 161 — AZ-91111105 — 131 NR 1"
sample_citation_list_4 = "[1985] ACS no 74 — [1985] SCJ No 74 (QL) — [1985] CarswellOnt 887 — JE 86-88 — 17 Admin LR 89 — [1986] DLQ 89 — 7 CHRR 3102 — 9 CCEL 185 — 12 OAC 241 — 34 ACWS (2d) 109 — 23 DLR (4th) 321 — 64 NR 161 — 52 OR (2d) 799"

When the user copies a block of text from CanLII containing parallel citations, they tend to take the following format:

> Year/Volume | Reporter | Pinpoint

There are some exceptions. Specifically, a handful of reporters out of Quebec don't adopt this convention, which makes parsing these text blocks more difficult than it would be otherwise.

In [32]:
def isolate_parallel_citations(other_citations: str) -> str:
    '''
    The function returns a list of parallel citations inferred from a block
    of text copied and pasted by the user.

    When copied from CanLII, the function returns a list of (apparently
    correct) parallel citations. The citations are separated by a long dash
    (—) and are preceded by a space. The function splits the block of text
    into a list of citations and removes the preceding space. The function
    then removes any citation elements that are in the exclude list. So
    far, this only includes the QuickLaw designation (QL). The function
    must make a small adjustment to the year of the citation if Westlaw
    reported the case.

    For citations copied directly from Westlaw, the function will likely
    only need to remove superfluous periods, after which point the
    citations will likely be correct. This functionality will be added in
    the near future.

    The function presently doesn't distinguish between the various
    items inside the string (e.g. the reporter, the year, the volume,
    etc.). This will be added in the near future.
    '''

    # Creates a list of citation elements to exclude
    # This list is (very likely) incomplete
    reporter_exclude_list = ["(QL)"]

    # Creates a list of parallel citations if the user copied the citations
    # from CanLII
    if "—" in other_citations:
        reporter_list = other_citations.split("—")
        reporter_list = [item.strip() for item in reporter_list]
    # Creates a list of parallel citations if the user copied the citations
    # from Westlaw
    if "," in other_citations:
        reporter_list = other_citations.split(",")
        reporter_list = [item.strip() for item in reporter_list]
        # Remove all periods from the citation

    reporter_list_parsed = []

    # Parses the list of parallel citations
    for citation in reporter_list:
        # Splits the citation into a list
        citation_split = citation.split(" ")

        # Light formatting
        for item in citation_split:
            # Remove items that are in the exclude list
            if item in reporter_exclude_list:
                citation_split.remove(item)
                citation = " ".join(citation_split)

            # Remove brackets around a number if the next item is a string containing "Carswell"
            # See McGill 9e 3.8.3
            if item.startswith("[") and \
                item.endswith("]") and \
                citation_split[citation_split.index(item) + 1].startswith("Carswell"):
                citation_split[citation_split.index(item)] = item[1:-1]
                citation = " ".join(citation_split)

        reporter_list_parsed.append(citation)

    return reporter_list_parsed


In [55]:
from api_calls import call_api_jurisprudence
from data.mcgill.reporter_data import neutral_citations_ca,\
    preferred_reporters, authoritative_reporters

def check_preferred_reporters(citation: str): # -> str:
    '''
    Checks to see which of the parallel citations are preferred. The
    function evaluates each list item and determines whether the citation
    is official, preferred, authoritative, unofficial, or unlisted. The
    function reports on its findings, listing the citations in order of
    preference, if any. The function assumes that the program has already
    dealt with neutral citations.
    '''
    # Get the list of preferred reporter abbreviations from
    # preferred_reporters and check to see if any of them are in the
    # citation. If so, return "preferred"
    # preferred_reports is a list of lists
    preferred_reports = [item[0] for item in preferred_reporters]
    authoritative_reports = [item[0] for item in authoritative_reporters]
    print(authoritative_reporters)
    preferred_reporters_located = []
    authoritative_reporters_located = []
    
    for item in preferred_reports:
        if f" {item} " in citation or f" {item}-" in citation:
            preferred_reporters_located.append(item)
    for item in authoritative_reports:
        if f" {item} " in citation or f" {item}-" in citation:
            authoritative_reporters_located.append(item)
    
    return preferred_reporters_located, authoritative_reporters_located


In [57]:

processed_citation_list_2 = isolate_parallel_citations(sample_citation_list_2)

for citation in processed_citation_list_2:
    print(check_preferred_reporters(citation))



[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
([], [])
[]
(['AR'], [])
[]
(['Alta LR'], [])
[]
(['WWR'], [])
[]
([], [])
[]
([], [])
[]
([], [])
