In [1]:
import os, re
import numpy as np
import pandas as pd

In [2]:
# Gridley-specific header info
all_section_headers = [
            'CALL TO ORDER',
            'ROLL CALL',
            'PLEDGE OF ALLEGIANCE',
            'INVOCATION',
            'PROCLAMATIONS',
            'INTRODUCTION OF NEW OR PROMOTED EMPLOYEES',
            'COMMUNITY PARTICIPATION FORUM',
            'ANNOUNCEMENT OF NEW EMPLOYEES AND PROMOTIONS',
            'CONSENT AGENDA',
            'PUBLIC HEARING',
            'ITEMS FOR COUNCIL CONSIDERATION',
            'CITY STAFF AND COUNCIL COMMITTEE REPORTS',
            'POTENTIAL FUTURE CITY COUNCIL ITEMS',
            'CLOSED SESSION',
            'ADJOURNMENT',
            'NOTE 1',
            'NOTE 2'
        ]
footer_start = 'GRIDLEY CITY COUNCIL AGENDA'

In [3]:
# get all Gridley agendas

data_dir =  '../data/docs/gridley/'
required_substrings = ['Gridley', 'Agenda', 'City-Council', '.txt']

matches_substrings = lambda s: all([ss in s for ss in required_substrings])
doc_list = [s for s in os.listdir(data_dir) if matches_substrings(s)]
doc_paths = [os.path.join(data_dir, doc) for doc in doc_list]

In [4]:
def parse_txt(doc):

    # the number of pages is the last character on the first page
    page_break_re = re.compile('\f')
    page_breaks = [m.start() for m in page_break_re.finditer(doc)]
    #num_pages = int(doc[:page_breaks[0]].replace('\n','').replace(' ', '')[-1])
    page_count_re = re.compile('Page\s[\d]\sof\s[\d]')
    n0, n1 = zip(*[[int(d) for d in s[5:].split(' of ')] for s in page_count_re.findall(doc)])
    num_pages = n0[np.where(np.array(n0)==np.array(n1))[0][0]]
    
    # trim extraneous pages and remove page footers
    agenda = doc[:page_breaks[num_pages-1]+1]
    agenda = re.sub('{}[^()]*\f'.format(footer_start), '', agenda)    
    
    # get section breaks in document
    all_section_starts = [agenda.find(h) for h in all_section_headers]
    section_heads, section_starts = zip(*[(k,t) for k, t in zip(all_section_headers, all_section_starts) if t>0])
    
    # sort sections
    sort_idx = np.argsort(section_starts)
    headers = list(np.array(section_heads)[sort_idx])
    breaks = list(np.array(section_starts)[sort_idx]) + [len(agenda)]    
    
    # get section ranges
    section_ranges = [('HEADER', 0, breaks[0])]
    section_ranges.extend([(headers[i], breaks[i], breaks[i+1]) for i in range(len(headers))])
    
    # store section
    sections = {head: agenda[start:end].replace(head,'') for head, start, end in section_ranges}
        
    return sections

In [5]:
docs = []
for path in doc_paths:
    with open(path, 'r') as f:
        doc = f.read()
    sections = parse_txt(doc)
    docs.append(sections)

In [8]:
for k in pd.DataFrame(docs)['CLOSED SESSION']:
    print(k)

 
 

4. 

Conference with Labor Negotiators pursuant to Government Code 54957.6 to meet 
and confer with International Brotherhood of Electrical Workers and Gridley Police 
Officers Association 


 - None 
 

 – None 
 

  
 

6. 

Conference with Legal Counsel concerning existing litigation pursuant to 
Government Code 54956.9 – Blackshire vs. City of Gridley, et al, Butte County 
Superior Court Case No. 19CV01639 


nan
 - None 
 

 – 
 

Discussion to fill Council Vacancy 
Approval of FY 17/18 Audit 
Fire Vehicle Purchase 

3/4/2019 
3/18/2019 
3/18/2019 

1. 

2. 

Government Code 54757 - Public Employee Performance Evaluation, Interim Police 
Chief 
 
*Government Code 54956.9 – Conference with Legal Counsel concerning existing 
litigation – The matter of Jose Jimenez, Claim No. NCWA-557377. 

 

nan
nan
 

9. 

Government Code 54956.9: Conference with Legal Counsel and Gridley City staff to 
discuss various claims presented against the City of Gridley. Claimants: Jaswinder 
Kaur; 