## This file creates a pandas implementation of the functions we have in terms_in_cases file
### It uses data from previous teams, and our new data from 4/9

In [2]:
#general imports
import json
import numpy as np
import pandas as pd

#this code makes your editing experience using both python files 
#and notebooks a lot easier
%load_ext autoreload
#written to make things easier
%aimport utility 
%autoreload 1



imported utility.py



In [3]:
all_cases, all_appeals = utility.combine_cases()

In [4]:
len(all_appeals)+len(all_cases)

7011

In [5]:

def pd_number_of_cases_per_keywords(case_data, list_of_keywords):
    # This function takes as input: a pandas dataframe with the case data, and a list of keywords to search for. 
    # It searches through the headnote and text for the keywords
    # Note: It is NOT caps sensitive.
    # See pd_number_in_headnote if you want to only search the headnote.
    headnotes = case_data["headnote"]
    headnotes = headnotes.str.contains("|".join(list_of_keywords),regex=True,case=False)
    texts = case_data["text"]
    texts = texts.str.contains("|".join(list_of_keywords),regex=True,case=False)
    all_cases = case_data[headnotes|texts]
    return all_cases
    
    

In [6]:
def pd_number_in_headnote(case_data, list_of_keywords):
    # This function takes as input: a pandas dataframe with the case data, and a list of keywords to search for. 
    # It searches through the headnote for the keywords
    # Note: It is NOT caps sensitive.
    # See pd_number_of_cases_per_keyword if you want to search through the headnote and text.
    headnotes = case_data["headnote"]
    headnotes = headnotes.str.contains("|".join(list_of_keywords),regex=True,case=False)
    all_cases = case_data[headnotes]
    return all_cases
    

### Now we look for informant related cases in the data

In [7]:
important_keywords = ["confidential informant", " ci ", "snitch", "informant", "gang informant"]

In [8]:
result_cases = pd_number_of_cases_per_keywords(all_cases, important_keywords)
print(len(result_cases))

79


In [9]:
result_appeals = pd_number_of_cases_per_keywords(all_appeals, important_keywords)
print(len(result_appeals))

146


### Now we look for reversal related cases in the data

In [10]:
rev = ["reversed","reversing"]
result_reversed_cases = pd_number_of_cases_per_keywords(all_cases,rev)
result_reversed_appeals = pd_number_of_cases_per_keywords(all_appeals,rev)
print(len(result_reversed_cases))

620


In [11]:
print(len(result_reversed_appeals))

1445


In [12]:
reversed_headnote_cases = pd_number_in_headnote(all_cases,rev)
reversed_headnote_appeals = pd_number_in_headnote(all_appeals,rev)
print(len(reversed_headnote_cases))

37


In [13]:
print(len(reversed_headnote_appeals))

102


### Now we look for civil cases in the data

In [14]:
civ = ["civil ", "civil action","civil,"]
result_civil_cases = pd_number_of_cases_per_keywords(all_cases,civ)
result_civil_appeals = pd_number_of_cases_per_keywords(all_appeals,civ)
print(len(result_civil_cases))

1156


In [15]:
print(len(result_civil_appeals))

2316


In [16]:
civil_headnote_cases = pd_number_in_headnote(all_cases,civ)
civil_headnote_appeals = pd_number_in_headnote(all_appeals,civ)
print(len(civil_headnote_cases))

951


In [17]:
print(len(civil_headnote_appeals))

2136


### Now we look for dissents in the cases

In [18]:
dis = ["dissent","dissenting"] 
result_dissent_cases = pd_number_of_cases_per_keywords(all_cases,dis)
result_dissent_appeals = pd_number_of_cases_per_keywords(all_appeals,dis)
print(len(result_dissent_cases))

278


In [19]:
print(len(result_dissent_appeals))

329


In [20]:
dissent_headnote_cases = pd_number_in_headnote(all_cases,dis)
dissent_headnote_appeals = pd_number_in_headnote(all_appeals,dis)
print(len(dissent_headnote_cases))

147


In [21]:
print(len(dissent_headnote_appeals))

216


### Now we look for footnotes in the data

In [22]:
ft = ["Footnote"]
foots_c = pd_number_of_cases_per_keywords(all_cases,ft)
foots_a = pd_number_of_cases_per_keywords(all_appeals,ft)
print(len(foots_c))
print(len(foots_a))

154
288


In [23]:
all_cases.loc[8,"text"]

8    \nThe plaintiffs appeal from a judgment of the...
8    BY THE COURT. The judgment on the aggravated r...
Name: text, dtype: object

### Deeper on informant cases

In [24]:
print("in cases:",len(result_cases))
print("in appeals:",len(result_appeals))

civil_informant_cases = pd_number_in_headnote(result_cases,civ)
civil_informant_appeals = pd_number_in_headnote(result_appeals,civ)
print("civil and informant cases:",len(civil_informant_cases))
print("civil and informant appeals:",len(civil_informant_appeals))

reversed_informant_cases = pd_number_of_cases_per_keywords(result_cases,rev)
reversed_informant_appeals = pd_number_of_cases_per_keywords(result_appeals,rev)
print("reversed and informant cases:",len(reversed_informant_cases))
print("reversed and informant appeals:",len(reversed_informant_appeals))

reversed_civil_informant_cases = pd_number_in_headnote(reversed_informant_cases,civ)
reversed_civil_informant_appeals = pd_number_in_headnote(reversed_informant_appeals,civ)
civil_reversed_informant_cases = pd_number_of_cases_per_keywords(civil_informant_cases,rev)
civil_reversed_informant_appeals = pd_number_of_cases_per_keywords(civil_informant_appeals,rev)
print("reversed, civil, informant cases:",len(reversed_civil_informant_cases),len(civil_reversed_informant_cases))
print("reversed, civil, informant appeals:",len(reversed_civil_informant_appeals),len(civil_reversed_informant_appeals))

# idk dissent?
dissent_informant_cases = pd_number_of_cases_per_keywords(result_cases,dis)
dissent_informant_appeals = pd_number_of_cases_per_keywords(result_appeals,dis)
print("dissent and informant cases:",len(dissent_informant_cases))
print("dissent and informant appeals:",len(dissent_informant_appeals))



in cases: 79
in appeals: 146
civil and informant cases: 10
civil and informant appeals: 9
reversed and informant cases: 28
reversed and informant appeals: 69
reversed, civil, informant cases: 2 2
reversed, civil, informant appeals: 1 1
dissent and informant cases: 15
dissent and informant appeals: 19


### Deeper on cases with dissent

In [25]:
print(len(result_dissent_cases))
print(len(result_dissent_appeals))

278
329


In [26]:
dissent_reversed_cases = pd_number_of_cases_per_keywords(result_dissent_cases,rev)
dissent_reversed_appeals = pd_number_of_cases_per_keywords(result_dissent_appeals,rev)
print(len(dissent_reversed_cases))
print(len(dissent_reversed_appeals))

91
130


In [29]:
conclu = pd_number_of_cases_per_keywords(result_dissent_cases,["Conclusion."])
print(len(conclu))
for i in range(len(conclu)):
    text = conclu["text"].values[i]
    index = text.find("Conclusion.")
    conclu["text"].values[i] = text[index:]
dissents = pd_number_of_cases_per_keywords(conclu,dis)
print(len(dissents))

222
119
