In [4]:
import json
import pandas as pd

# Load the CUAD JSON file
with open('../data/raw/CUADv1.json', 'r', encoding='utf-8') as f:
    cuad = json.load(f)

# Check what keys exist at the top
print("Top-level keys:", cuad.keys())

# Access the 'data' list
cuad_data = cuad['data']
print(f"\nTotal contracts: {len(cuad_data)}")

# View the structure of the first contract
first = cuad_data[0]
print("\nFirst contract title:", first['title'])
print("First paragraph text (preview):", first['paragraphs'][0]['context'][:300])


Top-level keys: dict_keys(['version', 'data'])

Total contracts: 510

First contract title: LIMEENERGYCO_09_09_1999-EX-10-DISTRIBUTOR AGREEMENT
First paragraph text (preview): EXHIBIT 10.6

                              DISTRIBUTOR AGREEMENT

         THIS  DISTRIBUTOR  AGREEMENT (the  "Agreement")  is made by and between Electric City Corp.,  a Delaware  corporation  ("Company")  and Electric City of Illinois LLC ("Distributor") this 7th day of September, 1999.

        


In [5]:
# Collect contract text and associated questions/answers
records = []

for contract in cuad_data:
    for para in contract['paragraphs']:
        context = para['context']
        for qa in para['qas']:
            question = qa['question']
            is_impossible = qa['is_impossible']
            answers = qa['answers'] if not is_impossible else []
            answer_texts = [a['text'] for a in answers]

            records.append({
                "contract_text": context,
                "question": question,
                "answers": answer_texts,
                "is_impossible": is_impossible
            })

# Create a DataFrame
df = pd.DataFrame(records)
df.head(5)


Unnamed: 0,contract_text,question,answers,is_impossible
0,EXHIBIT 10.6\n\n ...,Highlight the parts (if any) of this contract ...,[DISTRIBUTOR AGREEMENT],False
1,EXHIBIT 10.6\n\n ...,Highlight the parts (if any) of this contract ...,"[Distributor, Electric City Corp., Electric Ci...",False
2,EXHIBIT 10.6\n\n ...,Highlight the parts (if any) of this contract ...,"[7th day of September, 1999.]",False
3,EXHIBIT 10.6\n\n ...,Highlight the parts (if any) of this contract ...,[The term of this Agreement shall be ten (10...,False
4,EXHIBIT 10.6\n\n ...,Highlight the parts (if any) of this contract ...,[The term of this Agreement shall be ten (10...,False


In [6]:
df['question'].value_counts()


question
Highlight the parts (if any) of this contract related to "Document Name" that should be reviewed by a lawyer. Details: The name of the contract                                                                                                                                                                                                                                                                                                                                                                                           510
Highlight the parts (if any) of this contract related to "Minimum Commitment" that should be reviewed by a lawyer. Details: Is there a minimum order size or minimum amount or units per-time period that one party must buy from the counterparty under the contract?                                                                                                                                                                                                           

In [7]:
gov_law_df = df[df['question'] == 'What is the Governing Law?'].copy()

# Show a few examples
gov_law_df[['contract_text', 'answers']].head(3)


Unnamed: 0,contract_text,answers
