In [23]:
import requests
import os
import utils
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from numpy import linalg as LA
import math

# Display Pretty JSON

In [2]:
def pretty_json(dictionary):   
    formatted_json = json.dumps(dictionary, indent=4, sort_keys=True) # type 'str'
    print(formatted_json)

# Using the CAPAPI

The Caselaw Access Project API, also known as CAPAPI, serves all official US court cases published in books from 1658 to 2018. The collection includes over six million cases scanned from the Harvard Law Library shelves.

In [24]:
"""
case_json: request response object
Returns court id
"""
def get_name(case_json):
    return case_json["name"]

"""
case_json: request response object
Returns court id
"""
def get_id(case_json):
    return case_json["id"]


"""
case_json: request response object
Returns full-text of case
"""
def get_full_text(case_json):
    return case_json["casebody"]["data"]["opinions"][0]['text']

### Retreive single case by ID

In [3]:
single_case = utils.get_request_caselaw('https://api.case.law/v1/cases/1589229/?full_case=true').json() # actually a 'dict'
pretty_json(single_case)

{
    "casebody": {
        "data": {
            "attorneys": [
                "Harold Sheats, Martin H. Peabody, for plaintiff in error.",
                "Wilson, Branch ,& Barwick, M. Cook Barwick, Thomas S. Bentley, contra."
            ],
            "corrections": "",
            "head_matter": "38245.\nFULTON COUNTY CIVIL COURT v. ELZEY.\nDecided April 14, 1960.\nHarold Sheats, Martin H. Peabody, for plaintiff in error.\nWilson, Branch ,& Barwick, M. Cook Barwick, Thomas S. Bentley, contra.",
            "judges": [
                "Townsend, Carlisle, 'and Frankum, JJ., concur."
            ],
            "opinions": [
                {
                    "author": "Gardner, Presiding Judge.",
                    "text": "Gardner, Presiding Judge.\nThis court has, on innumerable occasions, held that where an employee is injured in the scope of his employment and the evidence before the State Board of Workmen\u2019s Compensation reflects that such was the case, an appellate c

### Phrase Search

In [25]:
case_json = utils.get_request_caselaw('https://api.case.law/v1/cases/?search="civil court"&full_case=true').json()
case_json

{'count': 13901,
 'next': 'https://api.case.law/v1/cases/?cursor=eyJwIjogWzEzLjE0ODEwNywgNjIyNDI1NjAwMDAwLCA4ODIwNzhdfQ%3D%3D&full_case=true&search=%22civil+court%22',
 'previous': None,
 'results': [{'id': 1589229,
   'url': 'https://api.case.law/v1/cases/1589229/',
   'name': 'FULTON COUNTY CIVIL COURT v. ELZEY',
   'name_abbreviation': 'Fulton County Civil Court v. Elzey',
   'decision_date': '1960-04-14',
   'docket_number': '38245',
   'first_page': '520',
   'last_page': '523',
   'citations': [{'type': 'official', 'cite': '101 Ga. App. 520'}],
   'volume': {'volume_number': '101',
    'barcode': '32044078445228',
    'url': 'https://api.case.law/v1/volumes/32044078445228/'},
   'reporter': {'full_name': 'Georgia Appeals Reports',
    'id': 519,
    'url': 'https://api.case.law/v1/reporters/519/'},
   'court': {'name_abbreviation': 'Ga. Ct. App.',
    'slug': 'ga-ct-app',
    'id': 8892,
    'name': 'Court of Appeals of Georgia',
    'url': 'https://api.case.law/v1/courts/ga-ct-a

### Full-text Search

In [None]:
utils.get_request_caselaw('https://api.case.law/v1/cases/?search=university published false article libel&full_case=true&ordering=decision_date').json()

In [None]:
test_case_response.json()

### Add a Date Range Filter

In [19]:
'&decision_date_min=1990-12-30&decision_date_max=1995-12-30'

'&decision_date_min=1990-12-30&decision_date_max=1995-12-30'

# Useful Data Structures

In [39]:
# The parts of the response we are interested in are the case name, case text, judge opinions, 
# and laws / regulations cited in the case.

data = list()
for result in case_json['results']:
    
    case_id = get_id(result)
    case_name = get_name(result)
    case_text = get_full_text(result)
    
    result_dict = {'case_id' : case_id, 'case_name' : case_name, 'text' : case_text}
    data.append(result_dict)

In [40]:
num_cases = case_json['count']
num_cases

13901

In [44]:
print("Loaded {} case transcripts".format(num_cases))
print("Each case transcript is a dictionary with the following keys...")
print(data[0].keys())


# Here, we will assign an index for each movie_id. This index will help us access data in numpy matrices.
case_id_to_index = {case_id:index for index, case_id in enumerate([d['case_id'] for d in data])}

# We will also need a dictionary maping movie names to movie ids
case_name_to_id = {name:case_id for name, case_id in zip([d['case_name'] for d in data],
                                                     [d['case_id'] for d in data])}
case_id_to_name = {v:k for k,v in case_name_to_id.items()}

# and because it might be useful...
case_name_to_index = {name:case_id_to_index[case_name_to_id[name]] for name in [d['case_name'] for d in data]}
case_index_to_name = {v:k for k,v in case_name_to_index.items()}

Loaded 13901 case transcripts
Each case transcript is a dictionary with the following keys...
dict_keys(['case_id', 'case_name', 'text'])


# Similarity Metric