In [9]:
import pandas as pd
import sqlite3
import sqlalchemy
import os
import re

In [2]:
!ls 'data'

 case_level.csv
 case_level.xlsx
 Chamrajnagar_8_Addl_Kollegal.rar
'CIVIL JUDGE AND JMFC, YELANDUR & PRL. CIVIL JUDGE AND JMFC COURT, CHAMARAJANAGAR'
'CIVIL JUDGE AND JMFC, YELANDUR _ PRL. CIVIL JUDGE AND JMFC COURT, CHAMARAJANAGAR.rar'
 hearing_level.xlsx
'Population Status Final.xlsx'
'PRL. CIVIL JUDGE AND JMFC, KOLLEGAL.rar'
'SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR'
'SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR.rar'
'SENIOR CIVIL JUDGE AND JMFC GUNDLUPET.rar'
'SENIOR CIVIL JUDGE AND JMFC, KOLLEGAL.rar'


In [3]:
folder_path = 'data/SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR/'

In each folder there will be files with `CaseInfo` in their name, these files are the cases data and then in addition to these files there might be files with `HistoryTableRows` in their name which is the respective hearing data.

Each file is marked with the A date representing when it was scraped.

In [19]:
cases_files = [os.path.join(folder_path, fn) for fn in os.listdir(folder_path) if 'CaseInfo' in fn]
hearing_files = [os.path.join(folder_path, fn) for fn in os.listdir(folder_path) if 'HistoryTableRows' in fn]

In [20]:
def extract_dates_from_filename(fn, regex=r"\b\d{2}\w{3}\d{4}"):
    '''
    Extract dates out of filenames of cases and hearings.
    
    Args:
        fn(str): filename to be parsed
        regex(str, default=\b\d{2}\w{3}\d{4}): The regex to match the dates
    '''
    try:
        return re.findall(regex, fn)[0]
    except IndexError:
        return None

In [21]:
[extract_dates_from_filename(fn) for fn in cases_files]

['01Jul2018', '30Jun2018']

In [22]:
fn = cases_files[0]
fn

'data/SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR/DistrictDataCaseInfoE1(01Jul2018Excel).csv'

In [25]:
with open(fn, 'r') as datafile:
    for line in datafile.readlines():
        print(line)
        break

"Id","CombinedCaseNumber","CaseNumber","CaseType","Year","CourtName","CourtHallNumber","Bench","DateFiled","CaseClassification","OrderType","Petitioner","PetitionerAdvocate","Respondent","RespondentAdvocate","CurrentStage","CurrentStatus","District","LastActionTaken","LatestOrder","BeforeHonarbleJudges","LastPostedFor","LastDateOfAction","NextHearingDate","LowerCourtName","LowerCourtCaseNumber","LowerCourtOtherDetails","LowerCourtDisposalDate","CaseGroup","LastSyncTime","RespondentType","PetitionerType","PresentedOn","BenchCategory","CaseOriginatedFrom","ListedTimes","Act","DisposalDate","LastListedOn","CaseCategory","CurrentPosition","NextListingPurpose","Purpose","FilingNumber","SerialNumber","cnr_number","CaseUpdateOn","PoliceStationName","NextListingCourt","RegistrationDate","ActionDate","NextListingDate","StageName","PostingStage","ListingDate","NextListingTime","DepartmentName","LowerCourtJudgmentDate","PresentDate","StampNumber","DateOfHearing","LowerCourtDistrict","LowerCourtJu

In [26]:
def combine_data(filenames, outfilename):
    '''
    Combine the data of cases and hearing into a single file for analysis
    '''
    skip_header = False
    with open(outfilename, 'w') as outputfile:
        for fn in filenames:
            with open(fn, 'r') as datafile:
                if skip_header:
                    datafile.readline()
                for line in datafile.readlines():
                    outputfile.write(line)
            skip_header = True
    return True

In [77]:
folder_path.strip('/').split('/')[-1]

'SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR'

In [28]:
'_'.join('SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR'.lower().split())

'senior_civil_judge_and_cjm_court,_chamarajanagar'

In [78]:
def generate_filename(folder_path, ext):
    '''
    Generate a filename to store the combined data

    Args:
        folder_path(str): The folder path from where data is being processed
        ext(str): This can be cases or hearings based on what we are processing
    '''
    name_from_folder_path = folder_path.strip('/').split('/')[-1]
    name = '_'.join(name_from_folder_path.lower().split())
    return '{}_{}.csv'.format(name, ext)

In [80]:
generate_filename(folder_path, 'hearings')

'senior_civil_judge_and_cjm_court,_chamarajanagar_hearings.csv'

In [29]:
combine_data(cases_files, 'data/senior_civil_judge_and_cjm_court,_chamarajanagar_cases.csv')

True

In [32]:
combine_data(hearing_files, 'data/senior_civil_judge_and_cjm_court,_chamarajanagar_hearings.csv')

True

In [45]:
case_data = pd.read_csv('data/senior_civil_judge_and_cjm_court,_chamarajanagar_cases.csv')
hearing_data = pd.read_csv('data/senior_civil_judge_and_cjm_court,_chamarajanagar_hearings.csv')

In [34]:
def filter_out_columns(data):
    non_udef_columns = [col for col in data.columns if not col.startswith('udef')]
    cols_without_nans = []
    for col in non_udef_columns:
        nan_ratio = pd.isnull(data[col]).sum()/data.shape[0]
        if nan_ratio < .5:
            cols_without_nans.append(col)
    return data[cols_without_nans]

In [46]:
case_data.shape

(4907, 85)

In [47]:
hearing_data.shape

(85154, 38)

In [84]:
'cases' if False else 'hearings'

'hearings'

In [48]:
engine = sqlalchemy.create_engine('sqlite:///daksh_db.sqlite')

In [81]:
processed_case_files = filter_out_columns(case_data)
processed_case_files.to_sql(name='cases', if_exists='replace', con=engine, chunksize=10000)
processed_case_files.to_csv('data/senior_civil_judge_and_cjm_court,_chamarajanagar_cases_processed.csv')
processed_hearing_files = filter_out_columns(hearing_data)
processed_hearing_files.to_sql(name='hearing', if_exists='replace', con=engine, chunksize=10000)
processed_hearing_files.to_csv('data/senior_civil_judge_and_cjm_court,_chamarajanagar_hearings_processed.csv')

In [82]:
pd.read_sql('select * from cases;', engine)

Unnamed: 0,index,Id,CombinedCaseNumber,CaseNumber,CaseType,Year,CourtName,CourtHallNumber,DateFiled,Petitioner,...,UnderActs,UnderSections,CourtState,CourtType,CourtDistrict,CourtComplex,FirstHearingDate,ParsingYear,Njdg_Judge_Name,Full_Identifier
0,0,9c83a4f3-2f80-4ded-82fe-d042c42513e6,EX - Execution Petition Under Order-0169-2002,169,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-06-29,1) PUTTAMALLAMMA ALAMMA DEAD BY LR,...,,,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2011-10-29,2002,,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
1,1,3be745f5-8d3e-4d12-a46c-9bb46b9f0745,EX - Execution Petition Under Order-0075-2002,75,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-07-02,1) DEAD BY LRS 1LAKSHMINARASIMHA2S RAVISHANKAR...,...,U/O 21 RULE 2 OF CPC,.,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2012-01-10,2002,,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
2,2,3589a09b-a233-4981-96c1-c4d4da678cfe,EX - Execution Petition Under Order-0368-2002,368,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-09-18,1) MAHADEVAPPA,...,U/O 21 RULE 2 OF CPC,.,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2011-09-23,2002,,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
3,3,dc958992-1312-4ce8-ba13-491245e6ecf1,EX - Execution Petition Under Order-0044-2002,44,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-06-29,1) DEAD BY LRS AGIDDAMMABMANGALAMMACMAHADEVA G...,...,,,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2011-10-14,2002,,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
4,4,9a0ef0ef-50d4-4585-b0e1-fe71bb317495,EX - Execution Petition Under Order-0367-2002,367,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-09-18,1) SIDDA MALLAPPA,...,U/O 21 RULE 2 OF CPC,.,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2012-02-28,2002,SENIOR CIVIL JUDGE AND CJM,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
5,5,53386f82-acb9-4e85-9fcd-69d3e93212ec,EX - Execution Petition Under Order-0366-2002,366,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-09-18,1) BASAVAIAH,...,U/O 21 RULE 2 OF CPC,.,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2012-01-16,2002,SENIOR CIVIL JUDGE AND CJM,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
6,6,d2501b6c-5bdf-4e54-aa53-627b9f643fc5,EX - Execution Petition Under Order-0047-2002,47,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-07-02,1) VIJAYA BANK,...,,,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2011-11-16,2002,SENIOR CIVIL JUDGE AND CJM,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
7,7,241960ce-c10c-4e44-a706-356774bf2a31,EX - Execution Petition Under Order-0049-2002,49,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-06-29,1) GURU SIDDAPPA,...,U/O 21 RULE 2 OF CPC,.,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2012-02-28,2002,,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
8,8,3f0ae084-07a4-4451-8115-22757cb81172,EX - Execution Petition Under Order-0099-2002,99,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-07-19,1) PUTTASWAMAPPA,...,U/O 21 RULE 2 OF CPC,.,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2002-10-26,2002,,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
9,9,be3af713-c7ef-4d57-93b1-5c565d047201,EX - Execution Petition Under Order-0301-2002,301,EX - Execution Petition Under Order,2002,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",206,2002-07-24,1) MAHADEVAMMA,...,U/O 21 RULE 2 OF CPC,.,Karnataka,Court Establishment,Chamrajanagar,"SENIOR CIVIL JUDGE AND CJM COURT, CHAMARAJANAGAR",2011-11-26,2002,SENIOR CIVIL JUDGE AND CJM,Karnataka--Chamrajanagar--SENIOR CIVIL JUDGE A...
