### This notebook demonstrates how to obtain the transcripts from the supreme court cases .json files

In [19]:
import os
import json

def load_files_as_json(directory):
    file_contents = {}
    for filename in os.listdir(directory):
        if filename.endswith('.js'):
            file_path = os.path.join(directory, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                try:
                    data = json.load(file)
                    casename = data['caseName']
                    if casename not in file_contents:
                        file_contents[casename] = data
                    else: 
                        print(f"Warning! Duplicate case names in {directory}")
                except json.JSONDecodeError:
                    print(f"Error decoding JSON from file: {filename}")
    return file_contents

case_directory = "./data/lawyer/2018/"
files_data = load_files_as_json(case_directory)

In [30]:
def get_lines(case_transcripts_dict: dict, verbose=False):
    lines = []
    if len(case_transcripts_dict['caseTranscripts']) > 0:
        for trans in case_transcripts_dict['caseTranscripts'][0]['transcript']:
            speaker = trans['speakerName']
            speaker_text = trans['textObjs'][0]['text']
            lines.append(speaker+": "+speaker_text)
            if verbose: print(speaker+": "+speaker_text)
    return ' '.join(lines)

for case_name,case_dict in files_data.items():
    case_transcript = get_lines(case_dict)