In [6]:
import json
from collections import Counter

docs_metadata = json.load(open('../data/DocumentCloud/documents.json'))

STOP_WORDS = ['Detroit', 'Cleveland', 'Akron', 'Ohio', 'Allegheny', 'Cuyahoga', 'Summit County', 'Wayne County', 'Michigan' ]
def chicago_filter(doc_json):
    for word in STOP_WORDS:
        if word.lower() in doc_json['source'].lower():
            return False
    return True
    
filtered_docs = filter(chicago_filter, docs_metadata)
chicago_docs = [x for x in filtered_docs]

TEXT_DATABASE_PATH = "../data/DocumentCloud/text/"

In [7]:
def get_docs_for_source(source, title_keyword="", only_title=False):
    filter_func = lambda x:  source.lower() in x['source'].lower()
    if title_keyword:
        filter_func = lambda x:  source.lower() in x['source'].lower() and title_keyword in x['title'].lower()
    source_filter = filter(filter_func, chicago_docs)
    if not only_title:
        source_docs = [x for x in source_filter]
    else:
        source_docs = [x['title'] for x in source_filter]
    print("Total documents for " +  str(source) + ": " + str(len(source_docs)))
    return source_docs

In [8]:
school_docs = get_docs_for_source("Chicago Public Schools", title_keyword="agenda")

Total documents for Chicago Public Schools: 30


In [9]:
agenda_docs = [doc['title'] for doc in school_docs]
agenda_docs

['Board of Education 2019-12-11 - Agenda For The Board Of Education',
 'Board of Education 2019-11-20 - Agenda For The Board Of Education',
 'Board of Education 2019-03-27 - Agenda For The Board Of Education',
 'Board of Education 2019-09-25 - Agenda For The Board Of Education',
 'Special board meeting 2019-09-18 - Agenda For The Special Board Meeting Of The Board Of Education',
 'Board of Education 2019-07-24 - Agenda For The Board Of Education Meeting',
 'Board of Education 2019-08-28 - Agenda For The Board Of Education',
 'Finance and Audit Committee 2019-07-24 - Agenda For The Chicago Board Of Education Finance And Audit Committee Meeting',
 'Board of Education 2019-04-24 - Agenda For The Board Of Education',
 'Board of Education 2019-06-26 - Agenda For The Board Of Education',
 'Board of Education 2019-05-22 - Agenda For The Board Of Education',
 'Finance and Audit Committee 2019-04-24 - Agenda For The Chicago Board Of Education Finance And Audit Committee Meeting',
 'Board of Edu

In [10]:
import random

source_counts = {}

def search_source_text(phrase, source, title_keyword="", repeated=False):
    results = []
    matching_docs = []
    source_counts.clear()
    for doc in get_docs_for_source(source, title_keyword=title_keyword):
        r = search_doc_text(phrase, doc, repeated)
        if r != "No results found":
            results.append(r) 
            matching_docs.append(doc)
    print("%d documents had matches" % len(results))
    print(source_counts)
    i = 0
    for r in results:
        print("----------------------\n" + r)
        i += 1
        if i > 50:
            return matching_docs
    return matching_docs
            

def search_doc_text(phrases, doc, repeated):
    if isinstance(phrases, str):
        phrases = [phrases]
    fname = TEXT_DATABASE_PATH + doc['id'] + ".txt"
    with open(fname, "r") as f:
        searchlines = f.readlines()
    matches = []
    for i, line in enumerate(searchlines):
        if not repeated:
            for p in phrases:
                if p in line:
                    matches.append((i, "".join(searchlines[i:i+3])))
        else:
            if phrases[0] in line[:50]:
                all_match = True
                for l in searchlines[i+1:i+3]:
                    if phrases[0] not in l[:50]:
                        all_match = False
                if all_match:
                    matches.append((i, "".join(searchlines[i:i+5])))
    if len(matches) > 0:
        source = doc['source']
        if source in source_counts:
            source_counts[source] += 1
        else:
            source_counts[source] = 1;
        result = "Title: %s, Source: %s, Matches: %d \n %s \n" % (doc['title'], source, len(matches), doc['canonical_url'])
        if not repeated and False:
            for m in matches:
                result += "Line %d: \n %s \n, %s" % (m[0], m[1], doc['canonical_url'])
        else:
            for m in random.sample(matches, 1):
                result += "Line %d: \n %s \n, %s" % (m[0], m[1], doc['canonical_url'])
        return result
    return "No results found"
   
        


In [14]:
contract_docs = search_source_text("LEAP", "Chicago Public Schools")

Total documents for Chicago Public Schools: 295
11 documents had matches
{'Chicago Public Schools': 11}
----------------------
Title: Board of Education 2019-07-24 - Proceedings, Source: Chicago Public Schools, Matches: 3 
 https://www.documentcloud.org/documents/6433867-Board-of-Education-2019-07-24-Proceedings.html 
Line 3551: 
 Cooperative Educational Service Agency and LEAP Innovations will continue to provide two stages of
professional development to schools in order to provide entry points that align to a school's prior
experience in personalized learning, as describe below.
 
, https://www.documentcloud.org/documents/6433867-Board-of-Education-2019-07-24-Proceedings.html
----------------------
Title: Board of Education 2015-08-26 - Proceedings, Source: Chicago Public Schools, Matches: 3 
 https://www.documentcloud.org/documents/5666332-Board-of-Education-2015-08-26-Proceedings.html 
Line 7951: 
 15-0826-PR4 Authorize the First Renewal Agreement with LEAP Innovations for
Personal

In [48]:
search_source_text("PUBLIC COMMENT", "")


Total documents for : 19515
477 documents had matches
{'Chicago Board of Ethics': 3, 'Chicago Transit Authority': 72, "Municipal Employees' Annuity and Benefit Fund of Chicago": 87, 'Chicago Park District': 44, 'Illinois International Port District': 10, 'Chicago Infrastructure Trust': 21, 'Illinois Gaming Board': 103, "Chicago Firemen's Annuity and Benefit Fund": 4, 'Cook County Local Records Commission': 15, 'Illinois Investment Policy Board': 1, 'Illinois Lottery Control Board': 7, 'Illinois Attorney General - Public Access Bureau': 1, 'Illinois Labor Relations Board': 64, 'Cook County Government': 1, 'Illinois Department of Public Health': 2, 'Illinois Complete Count Commission': 8, 'Cook County Health and Hospitals System': 19, 'City Bureau': 12, 'Chicago Public Library': 1, 'Chicago Department of Planning and Development': 2}
----------------------
Title: Board of Directors 2019-10-19 - Minutes, Source: Chicago Board of Ethics, Matches: 1 
 https://www.documentcloud.org/documents

[{'id': '6572191-Board-of-Directors-2019-10-19-Minutes',
  'title': 'Board of Directors 2019-10-19 - Minutes',
  'access': 'public',
  'pages': 12,
  'description': None,
  'source': 'Chicago Board of Ethics',
  'created_at': 'Tue, 10 Dec 2019 12:33:48 +0000',
  'updated_at': 'Tue, 10 Dec 2019 12:34:04 +0000',
  'canonical_url': 'https://www.documentcloud.org/documents/6572191-Board-of-Directors-2019-10-19-Minutes.html',
  'language': 'eng',
  'file_hash': 'b07f1996d474725ac05693ffc0acf5a4f6dcd094',
  'display_language': 'eng',
  'resources': {'pdf': 'https://assets.documentcloud.org/documents/6572191/Board-of-Directors-2019-10-19-Minutes.pdf',
   'text': 'https://assets.documentcloud.org/documents/6572191/Board-of-Directors-2019-10-19-Minutes.txt',
   'thumbnail': 'https://assets.documentcloud.org/documents/6572191/pages/Board-of-Directors-2019-10-19-Minutes-p1-thumbnail.gif',
   'search': 'https://www.documentcloud.org/documents/6572191/search.json?q={query}',
   'print_annotations':

In [9]:
school_contract_docs = search_source_text(["GRANT RECOMMENDATION REPORT"], "Justice")

Total documents for Justice: 295
28 documents had matches
{'Illinois Criminal Justice Information Authority': 28}
----------------------
Title: Authority Budget Committee 2019-12-12 - Agenda & Materials, Source: Illinois Criminal Justice Information Authority, Matches: 6 
 https://www.documentcloud.org/documents/6569561-Authority-Budget-Committee-2019-12-12-Agenda.html 
Line 2006: 
 BUDGET COMMITTEE GRANT RECOMMENDATION REPORT
Program Name:

 
, https://www.documentcloud.org/documents/6569561-Authority-Budget-Committee-2019-12-12-Agenda.html
----------------------
Title: Authority Budget Committee 2019-08-15 - Supplmental, Source: Illinois Criminal Justice Information Authority, Matches: 1 
 https://www.documentcloud.org/documents/6269492-Authority-Budget-Committee-2019-08-15-Supplmental.html 
Line 52: 
 BUDGET COMMITTEE GRANT RECOMMENDATION REPORT
Program Name:

 
, https://www.documentcloud.org/documents/6269492-Authority-Budget-Committee-2019-08-15-Supplmental.html
-----------------

In [5]:
pattern_docs = search_source_text([": "], "Cook County Health and Hospitals", repeated=True)

Total documents for Cook County Health and Hospitals: 1744
344 documents had matches
{'Cook County Health and Hospitals System': 344}
----------------------
Title: Finance Committee 2019-11-15 - 11/15/19 Finance Committee Meeting Minutes, Source: Cook County Health and Hospitals System, Matches: 3 
 https://www.documentcloud.org/documents/6568790-Finance-Committee-2019-11-15-11-15-19-Finance.html 
Line 4272: 
 Average Days in Accounts Receivable: Total accounts receivable over average daily revenue
Discharged Not Finally Billed Days: Total charges of discharge not finally billed over average daily revenue
Claims Initial Denials Percentage: Percentage of claims denied initially compared to total claims submitted.
* Source HFMA Key Hospital Statistics and Ratio Margins – Posted 2014
** (Best Practice Target)-Moody’s report, August 2017 47.8 days
 
, https://www.documentcloud.org/documents/6568790-Finance-Committee-2019-11-15-11-15-19-Finance.html
----------------------
Title: Finance Com

In [7]:
import re

def get_legislation(doc):
    fname = TEXT_DATABASE_PATH + doc['id'] + ".txt"
    with open(fname, "r") as f:
        doc_text = f.read()
        
    matches = re.split('(^\d\d-\d\d\d\d$)', doc_text, flags=re.MULTILINE)
    print(len(matches))
    
    legislation = []
    for i in range(1, len(matches), 2):
        legislation.append((matches[i], matches[i+1]))
       
    print(legislation[0])
    print(len(legislation))
    return legislation

def get_contract_info(legislation):
    contract_info = {"Contract ID": legislation[0]}
    if "PROPOSED CONTRACT AMENDMENT" in legislation[1]:
        contract_info["type"] = "PROPOSED CONTRACT AMENDMENT"
    elif "PROPOSED CONTRACT" in legislation[1]:
        contract_info["type"] = "PROPOSED CONTRACT"
    else:
        return {}
    lines = legislation[1].splitlines()
    
    for l in lines:
        components = l.split(": ")
        if len(components) > 1:
            contract_info[components[0]] = ": ".join(components[1:])
    return contract_info

contracts = []
for d in contract_docs:
    for l in get_legislation(d):
        contract_info = get_contract_info(l)
        if contract_info:
            contracts.append(contract_info)
    
# get_contract_info(get_legislation(contract_docs[0])[2])

7
('19-6779', '\nCOMMITTEE MINUTES\n\nApproval of the minutes from the meeting of 10/22/2019\nA motion was made by Commissioner Deer,\n19-6779. The motion carried by the following vote:\n\nseconded\n\nby\n\nCommissioner\n\nSims,\n\nAyes:\n\nArroyo, Anaya, Britton, Deer, K. Morrison, S. Morrison, Silvestri and Sims (8)\n\nAbsent:\n\nMoore (1)\n\n')
3
973
('19-4912', '\nSponsored by: LARRY SUFFREDIN, Cook County Board of Commissioners\n2\n\n Board of Commissioners\n\nJournal of Proceedings\n\nSeptember 5, 2019\n\nPROPOSED ORDINANCE AMENDMENT\nNOTICE OF ADDITIONAL DUTIES OR COSTS\nBE IT ORDAINED, by the Cook County Board of Commissioners, that Chapter 2, ADMINISTRATION,\nARTICLE III, COUNTY BOARD, DIVISION 1, GENERALLY, SECTION 2-71 to 2-100 of the Cook\nCounty Code is hereby amended as Follows:\nSection 2-79. - Additional duties or cost; statement required.\nIf an ordinance, ordinance amendment, resolution, or motion will impose additional duties or cost to\nthe work of a County departme

IndexError: list index out of range

In [8]:
print(len(contracts))
print(contracts[0].keys())

298
dict_keys(['Contract ID', 'type', 'Presented by', 'Department(s)', 'Vendor', 'Request', 'Original Contract Period', 'Proposed Amendment Type', 'Proposed Contract Period', 'Total Current Contract Amount Authority', 'Original Approval (Board or Procurement)', 'Increase Requested', 'Previous Board Increase(s)', 'Previous Chief Procurement Officer Increase(s)', 'Previous Board Renewals', 'Previous Chief Procurement Officer Renewals', 'Previous Board Extension(s)', 'Previous Chief Procurement Officer Extension(s)', 'Potential Fiscal Impact', 'Accounts', 'Contract Number(s)', 'Summary'])


In [9]:
key_counts = {}
for c in contracts:
    for k in c.keys():
        if k in key_counts:
            key_counts[k] += 1
        else:
            key_counts[k] = 1

print(len(key_counts))

sorted_keys = [k[0] for k in sorted(key_counts.items(), key=lambda item: item[1], reverse=True)]

print(sorted_keys)

for k in keys:
    if len(k) > 50:
        print(k)

118
['Contract ID', 'type', 'Department(s)', 'Presented by', 'Request', 'Vendor', 'Contract Number(s)', 'Accounts', 'Good(s) or Service(s)', 'Summary', 'Original Contract Period', 'Total Current Contract Amount Authority', 'Potential Fiscal Impact', 'Original Approval (Board or Procurement)', 'This Increase Requested', 'Previous Board Increase(s) or Extension(s)', 'Proposed Contract Period Extension', 'Previous Chief Procurement Officer Increase(s) or Extension(s)', 'Contract period', 'Contract Value', 'Proposed Amendment Type', 'Proposed Contract Period', 'Increase Requested', 'Previous Chief Procurement Officer Increase(s)', 'Previous Board Renewals', 'Previous Board Extension(s)', 'Previous Chief Procurement Officer Renewals', 'Previous Board Increase(s)', 'Potential Fiscal Year Budget Impact', 'Previous Chief Procurement Officer Extension(s)', 'Previous Chief Procurement Officer Extentiosn(s)', 'District(s)', 'Estimated Fiscal Impact', 'Proposed Contract Extension Period', 'Section

NameError: name 'keys' is not defined

In [327]:
import csv

#keys = contracts[0].keys()
with open('contracts.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, sorted_keys)
    dict_writer.writeheader()
    dict_writer.writerows(contracts)

In [270]:
# Section extraction
def get_caps_lines(doc):
    fname = TEXT_DATABASE_PATH + doc['id'] + ".txt"
    with open(fname, "r") as f:
        searchlines = f.readlines()
    headers = []
    for i, line in enumerate(searchlines):
        if line.isupper():
            headers.append(line.strip('\n').strip())
    return headers

def get_source_caps_lines(source):
    source_docs = get_docs_for_source(source)
    allheaders = []
    for doc in source_docs:
        allheaders += get_caps_lines(doc)
    c = Counter(allheaders)
    sorted(c)
    return c.most_common()

In [113]:
get_caps_lines(gaming_docs[1])
get_source_caps_lines("Chicago Teachers Pension Fund")

Total documents for Chicago Teachers Pension Fund: 33


[('N/A', 778),
 ('M', 354),
 ('EDUC GENERAL', 118),
 ('OF THE', 96),
 ('JUNE 20', 91),
 ('OCTOBER 17', 72),
 ('GLOBAL EQUITY', 60),
 ('SPEC SVCS SUP', 55),
 ('DOMESTIC EQUITY', 53),
 ('FIXED INCOME', 50),
 ('PROCEEDINGS', 48),
 ('ROLL CALL', 46),
 ('AMOUNT', 45),
 ('VENDOR', 44),
 ('DESCRIPTION', 44),
 ('REAL ESTATE', 40),
 ('PRIVATE EQUITY', 40),
 ('INFRASTRUCTURE', 40),
 ('ATTENDEES', 37),
 ('DFA ISCV', 27),
 ('WAMCO FICP', 26),
 ('AGENDA ITEMS', 26),
 ('CTPF (TO OFFSET OVRPYMT)', 25),
 ("PUBLIC SCHOOL TEACHERS' PENSION AND RETIREMENT FUND OF CHICAGO", 24),
 ('AGENDA', 24),
 ('AT&T', 22),
 ('MSCI EAFE', 20),
 ('MANAGER OF MANAGER COMPOSITE', 20),
 ('SUB TEACHER POSTAL', 18),
 ('FY 2019', 16),
 ('FY 2017', 16),
 ('FY 2018', 16),
 ('M9', 15),
 ('DISCUSSIONS', 15),
 ('M7', 13),
 ('CURIE METRO H S', 13),
 ('MAY 16', 13),
 ('PHYSICIANS, DISABILITY APPLICANTS', 12),
 ('DISCUSSIONS/ACTIONS', 12),
 ('REPORT OF THE COMMITTEE ON CLAIMS AND SERVICE CREDITS', 11),
 ('PUBLIC PARTICIPATION', 10),


In [10]:
# Section extraction
def get_caps_lines(doc):
    fname = TEXT_DATABASE_PATH + doc['id'] + ".txt"
    with open(fname, "r") as f:
        searchlines = f.readlines()
    headers = []
    for i, line in enumerate(searchlines):
        if line.isupper():
            headers.append(line.strip('\n').strip())
    return headers

def get_source_caps_lines(source):
    source_docs = get_docs_for_source(source)
    allheaders = []
    for doc in source_docs:
        allheaders += get_caps_lines(doc)
    c = Counter(allheaders)
    sorted(c)
    return c.most_common()

In [11]:
school_docs = get_docs_for_source("Chicago Public Schools", title_keyword="agenda")
boe_agendas = [doc for doc in school_docs if "Finance" not in doc['title'] and "Special" not in doc['title']]
len(boe_agendas)
all_agendas = set([d['title'] for d in boe_agendas])

Total documents for Chicago Public Schools: 30


In [12]:
school_contract_docs = search_source_text("TRANSFER OF FUNDS", "Chicago Public Schools", title_keyword="agenda")
with_transfer = set([d['title'] for d in school_contract_docs])

Total documents for Chicago Public Schools: 30
19 documents had matches
{'Chicago Public Schools': 19}
----------------------
Title: Board of Education 2019-12-11 - Agenda For The Board Of Education, Source: Chicago Public Schools, Matches: 1 
 https://www.documentcloud.org/documents/6572197-Board-of-Education-2019-12-11-Agenda-For-The.html 
Line 2145: 
 TRANSFER OF FUNDS
Various Units and Objects
THE CHIEF EXECUTIVE OFFICER RECOMMENDS THE FOLLOWING:
 
, https://www.documentcloud.org/documents/6572197-Board-of-Education-2019-12-11-Agenda-For-The.html
----------------------
Title: Board of Education 2019-11-20 - Agenda For The Board Of Education, Source: Chicago Public Schools, Matches: 2 
 https://www.documentcloud.org/documents/6553016-Board-of-Education-2019-11-20-Agenda-For-The.html 
Line 1648: 
 TRANSFER OF FUNDS
Various Units and Objects
THE CHIEF EXECUTIVE OFFICER RECOMMENDS THE FOLLOWING:
 
, https://www.documentcloud.org/documents/6553016-Board-of-Education-2019-11-20-Agenda-Fo

In [47]:
no_transfer = all_agendas - with_transfer

In [56]:
[d['canonical_url'] for d in boe_agendas if d['title'] in no_transfer]

['https://www.documentcloud.org/documents/6512035-Board-of-Education-2018-03-21-Agenda-For-The.html',
 'https://www.documentcloud.org/documents/6512034-Board-of-Education-2018-01-24-Agenda-For-The.html',
 'https://www.documentcloud.org/documents/6512031-Board-of-Education-2018-02-28-Agenda-For-The.html']

In [4]:
import re

ACTION_ID = "ACTION ID"

def extract_transfers(action_id, action_text):
    transfers = re.split('^([0-9]+)\.', action_text, flags=re.MULTILINE)
    
    transfer_list = []
    for i in range(1, len(transfers), 2):
        transfer_list.append((transfers[i], transfers[i+1]))
    
    print(transfer_list[-1])
    print(len(transfer_list))
    print(set([i for i in range(int(transfer_list[-1][0]))]) - set([int(x[0]) for x in transfer_list]))
    
    transfer_rows = []
    for t in transfer_list:
        transfer_info = {"Number": t[0], "Action": action_id}
        sections = t[1].split("\n\n")
        clean = [x for x in sections if x.strip() and x.strip() != action_id and not x.isnumeric()]
        # num = len([x for x in sections if x.strip() and x.strip() != action_id])
        #print(len(clean))
        for i, x in enumerate(clean):
            if i == 0:
                name = x.replace("\nRationale:", "")
                transfer_info["Name"] = name
            elif i == 1:
                transfer_info["Rationale"] = x
            elif i == 2:
                transfer_from = x
                split_x = x.split("Amount:")
                if len(split_x) > 1:
                    transfer_info["Amount"] = split_x[1]
                    transfer_from = split_x[0]
                for j, from_line in enumerate(transfer_from.split("\n")):
                    transfer_info["From" + str(j)] = from_line
            elif i == 3:
                transfer_to = x
                split_x = x.split("Amount:")
                if len(split_x) > 1:
                    transfer_info["Amount"] = split_x[1]
                    transfer_to = split_x[0]
                for j, to_line in enumerate(transfer_to.split("\n")):
                    transfer_info["To" + str(j)] = to_line
            elif i > 3:
                split_x = x.split("Amount:")
                if len(split_x) > 1:
                    transfer_info["Amount"] = split_x[1]
        transfer_rows.append(transfer_info)
    return transfer_rows
    

def extract_actions(doc):
    fname = TEXT_DATABASE_PATH + doc['id'] + ".txt"
    with open(fname, "r") as f:
        doc_text = f.read()
        
    actions = re.split('(^\d\d-\d\d\d\d-[a-zA-Z][a-zA-Z]\d$)', doc_text, flags=re.MULTILINE)
    action_list = []
    for i in range(1, len(actions), 2):
        action_list.append((actions[i], actions[i+1]))
    
    transfer_actions = set()
    for a in action_list:
        if "TRANSFER OF FUNDS" in a[1]:
            transfer_actions.add(a[0])
    
    transfers = []
    for action_id in transfer_actions:
        action_text = ""
        for a in action_list:
            if a[0] == action_id:
                action_text += a[1]
        transfers += extract_transfers(action_id, action_text)
    return transfers
        
            
        
    

In [5]:
transfers = extract_actions(school_contract_docs[0])

NameError: name 'school_contract_docs' is not defined

In [200]:
key_counts = {}
for c in transfers:
    for k in c.keys():
        if k in key_counts:
            key_counts[k] += 1
        else:
            key_counts[k] = 1
print(key_counts)

{'Number': 1026, 'Action': 1026, 'Name': 1026, 'Rationale': 1026, 'From0': 1026, 'From1': 1017, 'From2': 1016, 'From3': 1016, 'From4': 1016, 'From5': 1016, 'To0': 1026, 'To1': 1024, 'To2': 1012, 'To3': 1012, 'To4': 1009, 'To5': 1009, 'Amount': 1024, 'From6': 147, 'To6': 55, 'From7': 22, 'From8': 4}


In [201]:
import csv

keys = key_counts.keys()
with open('transfers.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(transfers)

In [196]:
print(transfers[0])
i = 0
for t in transfers:
    if len(t) > 5:
        print(t)

{'Number': '1', 'Action': '19-1211-EX1', 'Name': 'Transfer from Early College and Career - City Wide to Early College and Career - City Wide', 'Rationale': 'Transfer funds for transportation', 'From0': 'Transfer From:', 'From1': '13727 Early College and Career - City Wide', 'From2': '369 Title I - School Improvement Carl Perkins', 'From3': '57915 Miscellaneous - Contingent Projects', 'From4': '119035 Other Instruction Purposes - Miscellaneous', 'From5': '474567 Special Student Needs-C. Perkins Fy20', 'To0': 'Transfer To:', 'To1': '13727 Early College and Career - City Wide', 'To2': '369 Title I - School Improvement Carl Perkins', 'To3': '54210 Pupil Transportation', 'To4': '141501 Cte - Project Lead The Way', 'To5': '474567 Special Student Needs-C. Perkins Fy20', 'Amount': ' $1,000'}
{'Number': '1', 'Action': '19-1211-EX1', 'Name': 'Transfer from Early College and Career - City Wide to Early College and Career - City Wide', 'Rationale': 'Transfer funds for transportation', 'From0': 'Tr

In [132]:
for d in school_contract_docs[:1]:
    print(d['title'])
    print(d['canonical_url'])
    actions = extract_actions(d)
    #print([a for a in actions if len(a) < 12])

Board of Education 2019-12-11 - Agenda For The Board Of Education
https://www.documentcloud.org/documents/6572197-Board-of-Education-2019-12-11-Agenda-For-The.html
[('1', '\n\nTransfer from Early College and Career - City Wide to Early College and Career - City Wide\nRationale:\n\nTransfer funds for transportation\n\nTransfer From:\n13727 Early College and Career - City Wide\n369 Title I - School Improvement Carl Perkins\n57915 Miscellaneous - Contingent Projects\n119035 Other Instruction Purposes - Miscellaneous\n474567 Special Student Needs-C. Perkins Fy20\n\nTransfer To:\n13727 Early College and Career - City Wide\n369 Title I - School Improvement Carl Perkins\n54210 Pupil Transportation\n141501 Cte - Project Lead The Way\n474567 Special Student Needs-C. Perkins Fy20\n\nAmount: $1,000\n\n'), ('2', '\n\nTransfer from Diverse Learner Service Delivery to Diverse Learner Service Delivery\nRationale:\n\nSupplies for DRS processing of requests, supplies for professional development sessio

In [46]:
contract_docs = search_source_text("capital improvement", "Chicago Public Schools", title_keyword="agenda")
with_contract = set([d['title'] for d in contract_docs])
with_contract

Total documents for Chicago Public Schools: 30
1 documents had matches
{'Chicago Public Schools': 1}
----------------------
Title: Board of Education 2018-06-27 - Agenda For The Board Of Education, Source: Chicago Public Schools, Matches: 2 
 https://www.documentcloud.org/documents/6512032-Board-of-Education-2018-06-27-Agenda-For-The.html 
Line 38334: 
 with the City of Chicago to support capital improvement projects.

COMPENSATION.
 
, https://www.documentcloud.org/documents/6512032-Board-of-Education-2018-06-27-Agenda-For-The.html


{'Board of Education 2018-06-27 - Agenda For The Board Of Education'}

In [34]:
def extract_agreements(doc):
    fname = TEXT_DATABASE_PATH + doc['id'] + ".txt"
    with open(fname, "r") as f:
        doc_text = f.read()
        
    actions = re.split('(^\d\d-\d\d\d\d-[a-zA-Z][a-zA-Z]\d$)', doc_text, flags=re.MULTILINE)
    action_list = []
    for i in range(1, len(actions), 2):
        action_list.append((actions[i], actions[i+1]))
        
    agreement_actions = set()
    for a in action_list:
        if "AGREEMENT" in a[1]:
            agreement_actions.add(a[0])
    
    agreement = []
    for action_id in transfer_actions:
        action_text = ""
        for a in action_list:
            if a[0] == action_id:
                action_text += a[1]
        transfers += extract_transfers(action_id, action_text)
    return transfers

In [35]:
agreements = extract_agreements(contract_docs[0])

{'19-1120-PR1', '19-1120-EX2', '19-1120-PR5', '19-1120-PR9', '19-1120-ED1', '19-1120-PR2', '19-1120-PR8', '19-1120-PR3'}
