# Testing Overall Function

In [1]:
import numpy as np
import pandas as pd
import re
from datetime import date
import jsmith_acquire

In [2]:
#Set the pdf_paths
criminal_pdf_path = '/Users/johnathonsmith/Downloads/CR.PEND_Zavala_1-21-2022.pdf'
civil_pdf_path = '/Users/johnathonsmith/Downloads/CV.PEND.CASE_Dimmit_1-21-2022.pdf'

In [3]:
#Test function with criminal case pdf
crim_df = jsmith_acquire.build_dataframe(criminal_pdf_path)

Collected Data From 59 Cases.


In [4]:
crim_df

Unnamed: 0,County,Cause Number,File Date,Docket Date,Defendant Name,Attorney Name,Bondsman Name,Offense,Status
0,Zavala,00-00-00000-ZCR,08/23/2019,,,,,,
1,Zavala,03-12-02906-ZCR,12/30/2009,,"PUENTE, LISA MARIE",,,BURGLARY OF HABITATION,
2,Zavala,04-06-02918-ZCR,01/26/2010,,"RIOS, OSVALDO",,,BURGLARY OF BUILDING,
3,Zavala,04-06-02920-ZCR,01/29/2010,,"RIOS, OSVALDO","BAGLEY, MICHAEL",,BURGLARY OF BUILDING,
4,Zavala,07-04-03066-ZCR,04/30/2007,,"HERRERA, JOSE NICOLAS","PADILLA, ALFRED",SAENZ BAIL BOND,INDECENCY W/CHILD SEXUAL CONTACT,
5,Zavala,07-10-03078-ZCR,06/13/2008,,"RUIZ, DAVID",,TAVO BAIL BONDS,POSS CS PG 1 <1G,
6,Zavala,08-03-03092-ZCR,05/22/2008,,"RUIZ, DAVID","PADILLA, ALFRED",TAVO BAIL BONDS,DRIVING WHILE INTOXICATED 3RD OR MO,
7,Zavala,08-03-03094-ZCR,03/26/2008,,"LOPEZ JR, FELIX M","FRAUSTO, JOHNNY",,POSS CS PG 1 >=1G<4G,
8,Zavala,09-02-03164-ZCR,03/04/2009,,"MIRABAL, JOSE LUIS",,,"POSS MARIJ >2,000LBS",
9,Zavala,09-02-03170-ZCR,03/05/2009,,"GONZALEZ, GISEL ALEJANDRA",,TAVO BAIL BONDS,POSS MARIJ <2OZ,


In [5]:
#Test function with civil case pdf
civil_df = jsmith_acquire.build_dataframe(civil_pdf_path)

Collected Data From 644 Cases.


In [6]:
civil_df

Unnamed: 0,County,Cause Number,File Date,Cause of Action,Docket Date,Docket Type,ANS File,CR Number,Plaintiff Name,Plaintiff Attorney,Defendant Name,Defendant Attorney
0,Dimmit,00-03-01927-TX,03/21/2000,TAX-BEFORE 1.1.2008,,,,,"[JOHN DAVID BRADY, ELIZABETH BRADY]",[],[],[]
1,Dimmit,00-04-01934-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[DANIEL P. O'CONNOR],[ALICIA RANGEL GOMEZ],[]
2,Dimmit,00-04-01938-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[JAMES E. CABELLO],"[LUCINDA LEDESMA, PATRICIA LEDESMA, M E GARY T...",[]
3,Dimmit,00-04-09284-CV,04/17/2000,"SUIT ON CONTRACTS,NOTES,",,,,,"[FIRST SELECT, INC.]",[AUBYN JR. SHETTLE],[CANDIDO R. DEANDA],[]
4,Dimmit,00-05-01948-TX,01/19/2021,TAX DELINQUENCY,,,,,[CARRIZO SPRINGS CONSOLIDATE],[JAMES E. CABELLO],"[GUMECINDO GUTIERREZ, ET. AL.]",[]
...,...,...,...,...,...,...,...,...,...,...,...,...
639,Dimmit,99-10-01886-TX,10/22/1999,TAX-BEFORE 1.1.2008,,,,,"[STATE OF TEXAS, COUNTY OF D]",[JAMES E. CABELLO],"[DIMMIT LAND COMPANY, ET AL]",[]
640,Dimmit,99-10-01888-TX,10/22/1999,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, WILLIAM STEINHOFF...","[JOHN W. PETRY, AUGUST LINNARTZ JR.]"
641,Dimmit,99-10-01890-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, A.L. STOVER, GRAC...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P..."
642,Dimmit,99-10-01892-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, H. H. STROW, JOSI...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P..."


# Begin Preparing Dataframes

Now that all the data can be gathered from both types of PDFs, we can begin preparing the dataframes. They have asked for several fields that are not included in the PDFs, so I will need to add them manually. Things both dataframes will need:

__Case Type:__

Criminal, Civil, or Tax. __Complete__
    
    
__On Track:__

True, False. If a docket date is not listed or is more than 3 months behind the current date, the case is not on track and this value will be False. Otherwise, True. __Complete__


__Status:__

Pending, In Progress, Ready To Dispose, Disposed. This field represents the status of the case. All new entries will default to Pending, but each entry can be updated later. __Complete__


__Date Disposed:__

This field will hold the date that the case was disposed. Although this was not particularly asked for, it will allow us to track how long cases are taking to be disposed over time. __Complete__


__Notes:__

This is where they can write in their notes. The values in this field will be carried over so that no work is lost. __Complete__

# Case Type

I can determine the case type by looking at the cause number. If it contains 'CV', its civil. If it contains 'CR', it's criminal. If it contains 'TX', it's for taxes.

In [7]:
def get_case_type(value):
    """
    This function looks at the cause number and determines what type of case it is. If the given string doesn't meet
    any of the criteria, it assumes the case is Civil.
    
    Parameter:
        -value: A string for the cause number
        
    Returns:
        -type: A string for Civil, Criminal, or Tax
    """
    
    #Check for TX type first
    if value.count('TX') > 0:
        return 'Tax'
    elif value.count('CV') > 0:
        return 'Civil'
    elif value.count('CR') > 0:
        return 'Criminal'
    else:
        #Since there are many civil cases that don't follow the same formatting, 
        #I will assume that anything not matching above is a civil case.
        return 'Civil'


In [8]:
civil_df['Case Type'] = civil_df['Cause Number'].apply(get_case_type)

In [9]:
civil_df.head()

Unnamed: 0,County,Cause Number,File Date,Cause of Action,Docket Date,Docket Type,ANS File,CR Number,Plaintiff Name,Plaintiff Attorney,Defendant Name,Defendant Attorney,Case Type
0,Dimmit,00-03-01927-TX,03/21/2000,TAX-BEFORE 1.1.2008,,,,,"[JOHN DAVID BRADY, ELIZABETH BRADY]",[],[],[],Tax
1,Dimmit,00-04-01934-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[DANIEL P. O'CONNOR],[ALICIA RANGEL GOMEZ],[],Tax
2,Dimmit,00-04-01938-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[JAMES E. CABELLO],"[LUCINDA LEDESMA, PATRICIA LEDESMA, M E GARY T...",[],Tax
3,Dimmit,00-04-09284-CV,04/17/2000,"SUIT ON CONTRACTS,NOTES,",,,,,"[FIRST SELECT, INC.]",[AUBYN JR. SHETTLE],[CANDIDO R. DEANDA],[],Civil
4,Dimmit,00-05-01948-TX,01/19/2021,TAX DELINQUENCY,,,,,[CARRIZO SPRINGS CONSOLIDATE],[JAMES E. CABELLO],"[GUMECINDO GUTIERREZ, ET. AL.]",[],Tax


In [10]:
#Check that nothing went wrong
civil_df['Case Type'].value_counts()

Tax      400
Civil    244
Name: Case Type, dtype: int64

In [11]:
crim_df['Case Type'] = crim_df['Cause Number'].apply(get_case_type)

In [12]:
crim_df.head()

Unnamed: 0,County,Cause Number,File Date,Docket Date,Defendant Name,Attorney Name,Bondsman Name,Offense,Status,Case Type
0,Zavala,00-00-00000-ZCR,08/23/2019,,,,,,,Criminal
1,Zavala,03-12-02906-ZCR,12/30/2009,,"PUENTE, LISA MARIE",,,BURGLARY OF HABITATION,,Criminal
2,Zavala,04-06-02918-ZCR,01/26/2010,,"RIOS, OSVALDO",,,BURGLARY OF BUILDING,,Criminal
3,Zavala,04-06-02920-ZCR,01/29/2010,,"RIOS, OSVALDO","BAGLEY, MICHAEL",,BURGLARY OF BUILDING,,Criminal
4,Zavala,07-04-03066-ZCR,04/30/2007,,"HERRERA, JOSE NICOLAS","PADILLA, ALFRED",SAENZ BAIL BOND,INDECENCY W/CHILD SEXUAL CONTACT,,Criminal


In [13]:
#Check that nothing went wrong
crim_df['Case Type'].value_counts()

Criminal    59
Name: Case Type, dtype: int64

# On Track

I will use the Docket Date to determine if a case is on track or not. If the docket date is within three months of the current date, it will be considered on track. However, if it is not, or it is missing entirely, the case will be considered not on track. Since the criminal cases don't contain a docket date, I may need to use the file date. Or, I could add a column for docket date, but just assume they're all behind until they can be manually updated later.

In [14]:
#Build a function to calculate the days passed and determine if a case is on track or not
def check_on_track(value):
    """
    This function takes in a datetime object and calculates the number of days between it and the current date.
    If that number is greater than 90 days (about 3 months), then this function returns False to indicate that 
    a case is NOT on track. Otherwise, it returns True.
    
    Parameter:
        -value: This is a datetime object representing the case Docket Date
        
    Returns:
        -Boolean: True or False
    """
    
    #Check for docket date. If none, return False
    if value == '':
        return False
    
    #Get today's date
    today = date.today()
    
    #Convert it to datetime object
    today = pd.to_datetime(today)
    
    #Convert current value to datetime object
    value = pd.to_datetime(value)
    
    #Calculate days passed
    days_passed = today - value 
    
    #Convert the datetime object to an integer
    days_passed = days_passed // pd.Timedelta('1d')
    
    #If days passed > 90, case is not on track
    if days_passed > 90:
        return False
    else:
        return True

In [15]:
#Test the function
civil_df['On Track'] = civil_df['Docket Date'].apply(check_on_track)

In [16]:
civil_df

Unnamed: 0,County,Cause Number,File Date,Cause of Action,Docket Date,Docket Type,ANS File,CR Number,Plaintiff Name,Plaintiff Attorney,Defendant Name,Defendant Attorney,Case Type,On Track
0,Dimmit,00-03-01927-TX,03/21/2000,TAX-BEFORE 1.1.2008,,,,,"[JOHN DAVID BRADY, ELIZABETH BRADY]",[],[],[],Tax,False
1,Dimmit,00-04-01934-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[DANIEL P. O'CONNOR],[ALICIA RANGEL GOMEZ],[],Tax,False
2,Dimmit,00-04-01938-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[JAMES E. CABELLO],"[LUCINDA LEDESMA, PATRICIA LEDESMA, M E GARY T...",[],Tax,False
3,Dimmit,00-04-09284-CV,04/17/2000,"SUIT ON CONTRACTS,NOTES,",,,,,"[FIRST SELECT, INC.]",[AUBYN JR. SHETTLE],[CANDIDO R. DEANDA],[],Civil,False
4,Dimmit,00-05-01948-TX,01/19/2021,TAX DELINQUENCY,,,,,[CARRIZO SPRINGS CONSOLIDATE],[JAMES E. CABELLO],"[GUMECINDO GUTIERREZ, ET. AL.]",[],Tax,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639,Dimmit,99-10-01886-TX,10/22/1999,TAX-BEFORE 1.1.2008,,,,,"[STATE OF TEXAS, COUNTY OF D]",[JAMES E. CABELLO],"[DIMMIT LAND COMPANY, ET AL]",[],Tax,False
640,Dimmit,99-10-01888-TX,10/22/1999,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, WILLIAM STEINHOFF...","[JOHN W. PETRY, AUGUST LINNARTZ JR.]",Tax,False
641,Dimmit,99-10-01890-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, A.L. STOVER, GRAC...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False
642,Dimmit,99-10-01892-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, H. H. STROW, JOSI...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False


In [17]:
civil_df['On Track'].value_counts()

False    595
True      49
Name: On Track, dtype: int64

# Status

This field represents the status of the case. All new entries will default to Pending, but each entry can be updated later.

In [18]:
civil_df['Status'] = 'Pending'

In [19]:
civil_df

Unnamed: 0,County,Cause Number,File Date,Cause of Action,Docket Date,Docket Type,ANS File,CR Number,Plaintiff Name,Plaintiff Attorney,Defendant Name,Defendant Attorney,Case Type,On Track,Status
0,Dimmit,00-03-01927-TX,03/21/2000,TAX-BEFORE 1.1.2008,,,,,"[JOHN DAVID BRADY, ELIZABETH BRADY]",[],[],[],Tax,False,Pending
1,Dimmit,00-04-01934-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[DANIEL P. O'CONNOR],[ALICIA RANGEL GOMEZ],[],Tax,False,Pending
2,Dimmit,00-04-01938-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[JAMES E. CABELLO],"[LUCINDA LEDESMA, PATRICIA LEDESMA, M E GARY T...",[],Tax,False,Pending
3,Dimmit,00-04-09284-CV,04/17/2000,"SUIT ON CONTRACTS,NOTES,",,,,,"[FIRST SELECT, INC.]",[AUBYN JR. SHETTLE],[CANDIDO R. DEANDA],[],Civil,False,Pending
4,Dimmit,00-05-01948-TX,01/19/2021,TAX DELINQUENCY,,,,,[CARRIZO SPRINGS CONSOLIDATE],[JAMES E. CABELLO],"[GUMECINDO GUTIERREZ, ET. AL.]",[],Tax,False,Pending
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639,Dimmit,99-10-01886-TX,10/22/1999,TAX-BEFORE 1.1.2008,,,,,"[STATE OF TEXAS, COUNTY OF D]",[JAMES E. CABELLO],"[DIMMIT LAND COMPANY, ET AL]",[],Tax,False,Pending
640,Dimmit,99-10-01888-TX,10/22/1999,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, WILLIAM STEINHOFF...","[JOHN W. PETRY, AUGUST LINNARTZ JR.]",Tax,False,Pending
641,Dimmit,99-10-01890-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, A.L. STOVER, GRAC...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending
642,Dimmit,99-10-01892-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, H. H. STROW, JOSI...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending


# Date Disposed

This field will hold the date that the case was disposed. Although this was not particularly asked for, it will allow us to track how long cases are taking to be disposed over time. 

In [20]:
#Just add the column for now.
civil_df['Date Disposed'] = ''

In [21]:
civil_df

Unnamed: 0,County,Cause Number,File Date,Cause of Action,Docket Date,Docket Type,ANS File,CR Number,Plaintiff Name,Plaintiff Attorney,Defendant Name,Defendant Attorney,Case Type,On Track,Status,Date Disposed
0,Dimmit,00-03-01927-TX,03/21/2000,TAX-BEFORE 1.1.2008,,,,,"[JOHN DAVID BRADY, ELIZABETH BRADY]",[],[],[],Tax,False,Pending,
1,Dimmit,00-04-01934-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[DANIEL P. O'CONNOR],[ALICIA RANGEL GOMEZ],[],Tax,False,Pending,
2,Dimmit,00-04-01938-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[JAMES E. CABELLO],"[LUCINDA LEDESMA, PATRICIA LEDESMA, M E GARY T...",[],Tax,False,Pending,
3,Dimmit,00-04-09284-CV,04/17/2000,"SUIT ON CONTRACTS,NOTES,",,,,,"[FIRST SELECT, INC.]",[AUBYN JR. SHETTLE],[CANDIDO R. DEANDA],[],Civil,False,Pending,
4,Dimmit,00-05-01948-TX,01/19/2021,TAX DELINQUENCY,,,,,[CARRIZO SPRINGS CONSOLIDATE],[JAMES E. CABELLO],"[GUMECINDO GUTIERREZ, ET. AL.]",[],Tax,False,Pending,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639,Dimmit,99-10-01886-TX,10/22/1999,TAX-BEFORE 1.1.2008,,,,,"[STATE OF TEXAS, COUNTY OF D]",[JAMES E. CABELLO],"[DIMMIT LAND COMPANY, ET AL]",[],Tax,False,Pending,
640,Dimmit,99-10-01888-TX,10/22/1999,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, WILLIAM STEINHOFF...","[JOHN W. PETRY, AUGUST LINNARTZ JR.]",Tax,False,Pending,
641,Dimmit,99-10-01890-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, A.L. STOVER, GRAC...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending,
642,Dimmit,99-10-01892-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, H. H. STROW, JOSI...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending,


# Notes

This is where they can write in their notes. The values in this field will be carried over so that no work is lost.

In [22]:
#Just add the column for now
civil_df['Notes'] = ''

In [23]:
civil_df

Unnamed: 0,County,Cause Number,File Date,Cause of Action,Docket Date,Docket Type,ANS File,CR Number,Plaintiff Name,Plaintiff Attorney,Defendant Name,Defendant Attorney,Case Type,On Track,Status,Date Disposed,Notes
0,Dimmit,00-03-01927-TX,03/21/2000,TAX-BEFORE 1.1.2008,,,,,"[JOHN DAVID BRADY, ELIZABETH BRADY]",[],[],[],Tax,False,Pending,,
1,Dimmit,00-04-01934-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[DANIEL P. O'CONNOR],[ALICIA RANGEL GOMEZ],[],Tax,False,Pending,,
2,Dimmit,00-04-01938-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[JAMES E. CABELLO],"[LUCINDA LEDESMA, PATRICIA LEDESMA, M E GARY T...",[],Tax,False,Pending,,
3,Dimmit,00-04-09284-CV,04/17/2000,"SUIT ON CONTRACTS,NOTES,",,,,,"[FIRST SELECT, INC.]",[AUBYN JR. SHETTLE],[CANDIDO R. DEANDA],[],Civil,False,Pending,,
4,Dimmit,00-05-01948-TX,01/19/2021,TAX DELINQUENCY,,,,,[CARRIZO SPRINGS CONSOLIDATE],[JAMES E. CABELLO],"[GUMECINDO GUTIERREZ, ET. AL.]",[],Tax,False,Pending,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639,Dimmit,99-10-01886-TX,10/22/1999,TAX-BEFORE 1.1.2008,,,,,"[STATE OF TEXAS, COUNTY OF D]",[JAMES E. CABELLO],"[DIMMIT LAND COMPANY, ET AL]",[],Tax,False,Pending,,
640,Dimmit,99-10-01888-TX,10/22/1999,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, WILLIAM STEINHOFF...","[JOHN W. PETRY, AUGUST LINNARTZ JR.]",Tax,False,Pending,,
641,Dimmit,99-10-01890-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, A.L. STOVER, GRAC...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending,,
642,Dimmit,99-10-01892-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, H. H. STROW, JOSI...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending,,


# Build Prepare Function

Build a function to prepare the case dataframes and add columns as necessary.

In [24]:
def prepare_case_dataframe(df):
    """
    This function takes in a newly created case dataframe and adds additional columns. Do not pass in a dataframe
    that has already been manually updated. It will remove any previous work.
    
    Parameter:
        - df: The newly created case dataframe. Can be civil or criminal.
        
    Returns:
        - df: The same dataframe, but with new columns added.
    """
    
    #Create case type column
    df['Case Type'] = df['Cause Number'].apply(get_case_type)
    
    #Create On Track column
    df['On Track'] = df['Docket Date'].apply(check_on_track)
    
    #Create Status column. Defaults to Pending
    df['Status'] = 'Pending'
    
    #Create File Has Image column
    df['File Has Image'] = ''
    
    #Create Need File column
    df['Need File'] = ''
    
    #Create Disposed Date column
    df['Disposed Date'] = ''
    
    #Create Finding column
    df['Finding'] = ''
    
    #Create Finding Date column
    df['Finding Date'] = ''
    
    return df

# Test Prepare Function

Test the above function on both the criminal and civil case dataframes. Make sure it works for both.

In [25]:
import jsmith_prepare

In [26]:
#Get New Dataframes
crim_df = jsmith_acquire.build_dataframe(criminal_pdf_path)
civil_df = jsmith_acquire.build_dataframe(civil_pdf_path)

Collected Data From 59 Cases.
Collected Data From 644 Cases.


In [27]:
#Test on criminal dataframe first
crim_df = jsmith_prepare.prepare_case_dataframe(crim_df)

In [28]:
crim_df

Unnamed: 0,County,Cause Number,File Date,Docket Date,Defendant Name,Attorney Name,Bondsman Name,Offense,Status,Case Type,On Track,File Has Image,Need File,Disposed Date,Finding,Finding Date
0,Zavala,00-00-00000-ZCR,08/23/2019,,,,,,Pending,Criminal,False,,,,,
1,Zavala,03-12-02906-ZCR,12/30/2009,,"PUENTE, LISA MARIE",,,BURGLARY OF HABITATION,Pending,Criminal,False,,,,,
2,Zavala,04-06-02918-ZCR,01/26/2010,,"RIOS, OSVALDO",,,BURGLARY OF BUILDING,Pending,Criminal,False,,,,,
3,Zavala,04-06-02920-ZCR,01/29/2010,,"RIOS, OSVALDO","BAGLEY, MICHAEL",,BURGLARY OF BUILDING,Pending,Criminal,False,,,,,
4,Zavala,07-04-03066-ZCR,04/30/2007,,"HERRERA, JOSE NICOLAS","PADILLA, ALFRED",SAENZ BAIL BOND,INDECENCY W/CHILD SEXUAL CONTACT,Pending,Criminal,False,,,,,
5,Zavala,07-10-03078-ZCR,06/13/2008,,"RUIZ, DAVID",,TAVO BAIL BONDS,POSS CS PG 1 <1G,Pending,Criminal,False,,,,,
6,Zavala,08-03-03092-ZCR,05/22/2008,,"RUIZ, DAVID","PADILLA, ALFRED",TAVO BAIL BONDS,DRIVING WHILE INTOXICATED 3RD OR MO,Pending,Criminal,False,,,,,
7,Zavala,08-03-03094-ZCR,03/26/2008,,"LOPEZ JR, FELIX M","FRAUSTO, JOHNNY",,POSS CS PG 1 >=1G<4G,Pending,Criminal,False,,,,,
8,Zavala,09-02-03164-ZCR,03/04/2009,,"MIRABAL, JOSE LUIS",,,"POSS MARIJ >2,000LBS",Pending,Criminal,False,,,,,
9,Zavala,09-02-03170-ZCR,03/05/2009,,"GONZALEZ, GISEL ALEJANDRA",,TAVO BAIL BONDS,POSS MARIJ <2OZ,Pending,Criminal,False,,,,,


In [29]:
#Test on civil dataframe
civil_df = jsmith_prepare.prepare_case_dataframe(civil_df)

In [30]:
civil_df

Unnamed: 0,County,Cause Number,File Date,Cause of Action,Docket Date,Docket Type,ANS File,CR Number,Plaintiff Name,Plaintiff Attorney,Defendant Name,Defendant Attorney,Case Type,On Track,Status,File Has Image,Need File,Disposed Date,Finding,Finding Date
0,Dimmit,00-03-01927-TX,03/21/2000,TAX-BEFORE 1.1.2008,,,,,"[JOHN DAVID BRADY, ELIZABETH BRADY]",[],[],[],Tax,False,Pending,,,,,
1,Dimmit,00-04-01934-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[DANIEL P. O'CONNOR],[ALICIA RANGEL GOMEZ],[],Tax,False,Pending,,,,,
2,Dimmit,00-04-01938-TX,04/20/2000,TAX SUIT,,,,,[STATE OF TEXAS COUNTY OF DI],[JAMES E. CABELLO],"[LUCINDA LEDESMA, PATRICIA LEDESMA, M E GARY T...",[],Tax,False,Pending,,,,,
3,Dimmit,00-04-09284-CV,04/17/2000,"SUIT ON CONTRACTS,NOTES,",,,,,"[FIRST SELECT, INC.]",[AUBYN JR. SHETTLE],[CANDIDO R. DEANDA],[],Civil,False,Pending,,,,,
4,Dimmit,00-05-01948-TX,01/19/2021,TAX DELINQUENCY,,,,,[CARRIZO SPRINGS CONSOLIDATE],[JAMES E. CABELLO],"[GUMECINDO GUTIERREZ, ET. AL.]",[],Tax,False,Pending,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639,Dimmit,99-10-01886-TX,10/22/1999,TAX-BEFORE 1.1.2008,,,,,"[STATE OF TEXAS, COUNTY OF D]",[JAMES E. CABELLO],"[DIMMIT LAND COMPANY, ET AL]",[],Tax,False,Pending,,,,,
640,Dimmit,99-10-01888-TX,10/22/1999,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, WILLIAM STEINHOFF...","[JOHN W. PETRY, AUGUST LINNARTZ JR.]",Tax,False,Pending,,,,,
641,Dimmit,99-10-01890-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, A.L. STOVER, GRAC...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending,,,,,
642,Dimmit,99-10-01892-TX,12/29/2020,TAX DELINQUENCY,,,,,[STATE OF TEXAS COUNTY OF DI],[HAROLD D PUTMAN],"[DIMMIT LAND COMPANY, ET AL, H. H. STROW, JOSI...","[AUGUST LINNARTZ JR., JOHN W. PETRY, JOHN W. P...",Tax,False,Pending,,,,,


# Begin Troubleshooting

In [2]:
#create new civil case pdf path
civil_path = '/Users/johnathonsmith/Downloads/CV.PEND.CASE_Dimmit%204.11.2022.pdf'

In [3]:
civil_df = jsmith_acquire.build_dataframe(civil_path)

TypeError: build_dataframe() missing 1 required positional argument: 'content'