# LaTeX Automation for NYCHA Waste Individual Action Plans

This notebook contains code needed to create, populate, and write to LaTeX files numerous components of NYCHA's Individualized Waste-Management Action Plans (...or the same product under several similar titles). In addition, this notebook contains methods to compile two versions of the reports for each consolidation:
1. Complete, formatted reports for final presentation
2. Reports that have certain formatting elements removed for the purpose of presentation to development staff
The definition and implementation of these compilation methods, along with necessary imports and setup comands, is located at the head of the document.

To run these methods on your local machine, a few utilities and programs need to be installed, and their paths inserted at locations noted below. These utilities are:
1. pdflatex -- Compiles LaTeX source code into a PDF. Installed as part of most LaTeX distributions.
2. pdfjam -- Manipulates existing PDF files and pdflatex outputs. Often installed alongside LaTeX distributions, but may require custom installation.
3. ghostscript -- Compresses PDFs to reduce file size. Installed via Homebrew or MacPorts (though other methods may be available).


## Getting Started

First, run the __following four code blocks.__ Take special care to ensure that all required packages import correctly.

In [2]:
#System imports
import sys
import glob
import os
import subprocess
import shutil
import datetime


#Data manipulation imports
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
import math

#File and text manipulation imports
from PIL import Image
from pdf2image import convert_from_path
import re
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from pylatexenc.latexencode import unicode_to_latex
from pylatexenc.latexencode import UnicodeToLatexEncoder

In [3]:
#Set Global Vars and Options
os.chdir('/Users/kyleslugg/Documents/NYCHA/Production')
pd.set_option('display.max_columns', None)
fha_tds_list =['226', '283', '212', '226', '283', '212', '213', '274', '275', '260', '273', '284', '209']

In [4]:
#Create Core Methods
def get_standardized_names(drop_fha=True):
    '''Reads multiple development and consolidation name lists, applies a fuzzy matching routine
    to resolve discrepancies in those lists, and corrects known errors of that matching process 
    discovered during plan generation. Returns dictionaries containing data associated with
    (1) consolidations and (2) developments, with TDS numbers as keys.
    
    Parameters:
        drop_fha (bool): Boolean indicating whether FHA properties should be dropped from the
        resulting dictionaries.
    
    Returns:
        tuple: a tuple "(consolidations, developments)" containing two dictionaries, which
        ontain data associated with (1) consolidations and (2) developments. Keys are TDS numbers. 
    '''
    
    databook_names = pd.read_csv('DATA/name_tables/dev_data_book_name.csv')
    databook_names['CONS_TDS'] = databook_names['CONS_TDS'].apply(lambda x: str(int(x)).zfill(3))
    databook_names['TDS'] = databook_names['TDS'].apply(lambda x: str(int(x)).zfill(3))
    staff_names = pd.read_csv('DATA/name_tables/staff_cons_name.csv')
    staff_names['RC_Name'] = staff_names['RC Name']
    
    if drop_fha:
        databook_names = databook_names.query(f'TDS not in {fha_tds_list}')
    
    consolidations = {}
    developments = {}

    for row in databook_names.itertuples():
        consolidations[row.CONS_TDS] = {'name':row.CONS_NAME, 'alternates':[row.MANAGED_BY]}
        developments[row.TDS] = {'name':row.DEV_NAME, 'name_alternates':[], 'cons_tds':row.CONS_TDS}
        
    def find_closest_fuzzy_match(name, comp_df, comp_col_name, return_col_name):
        #Matches names in the two namesets
        values = comp_df[comp_col_name].unique()
        comp_df_copy = pd.DataFrame(data=values, index=[i for i in range(0,len(values))], columns=[comp_col_name])

        '''
        def strip_name(x):
            string = str(x).lower()
            string = string.replace('consolidated','')
            string = string.replace('consolidation', '')
            string = string.replace('houses', '')
            return string

        comp_df_copy['partial_ratio'] = comp_df_copy[comp_col_name].apply(lambda x: fuzz.partial_ratio(strip_name(name), strip_name(x)))
        highest_match = comp_df_copy['partial_ratio'].max()

        matches = comp_df_copy.loc[comp_df_copy['partial_ratio']==highest_match, 'CONS_NAME']

        if matches.shape[0] == 1:
            return matches.iloc[0]
        else:
            print(matches)
            return 'ZZZ MULTIPLE MATCHES FOUND'

        '''
        return process.extractOne(str(name).lower(), values.tolist())[0]
    
    staff_names['NAME_MATCH'] = staff_names['RC Name'].apply(lambda x: find_closest_fuzzy_match(x, databook_names, 'CONS_NAME', 'CONS_NAME'))

    match_corrections = {'Justice Sonia Sotomayor  Consolidated': 'SOTOMAYOR HOUSES CONSOLIDATED',
                        'Murphy Consolidated': ''
                        }

    def make_corrections(row, index_col, data_col, dictionary):
        #Applies manual corrections, using a dictionary with the structure {Name in Dataset: Standardized Name}
        if str(row[index_col]).strip() in dictionary.keys():
            return dictionary[row[index_col]]
        else:
            return row[data_col]

    staff_names['AMENDED_MATCHES'] = staff_names.apply(lambda row: make_corrections(row, 'RC Name', 'NAME_MATCH', match_corrections), axis=1)

    staff_names = staff_names.merge(databook_names[['CONS_NAME', 'CONS_TDS']], left_on='AMENDED_MATCHES', right_on='CONS_NAME', how='left')

    for row in staff_names.itertuples():
        try:
            consolidations[row.CONS_TDS]['alternates'].append(row.RC_Name)
        except:
            print(f'TDS #{row.CONS_TDS} raised an exception.')
    
    #From vehicle data...
    consolidation_corrections = {'Brooklyn Borough Management':'N/A',
                            'LaGuardia Houses':'LA GUARDIA CONSOLIDATED',
                            'Hylan':'BUSHWICK CONSOLIDATED',
                            'Manhattan Property Management':'N/A',
                            'NYCHA - Brooklyn Property Mgmt':'N/A',
                            'Queens-Staten Island Borough Manag':'N/A',
                            'Webster-Morrisania Houses': 'WEBSTER CONSOLIDATED',
                            'NGO':'N/A',
                            'Millbrook Houses':'MILL BROOK CONSOLIDATED',
                            'Van Dyke Houses':'VAN DYKE I',
                            'UPACA':'JACKIE ROBINSON CONSOLIDATED',
                            'Department of Mixed Finance Asset':'N/A',
                            'Ocean Hill-Saratoga Village':'OCEAN HILL CONSOLIDATED',
                            'nan':'N/A',
                            'L.E.S. II/Campos':'LOWER EAST SIDE CONSOLIDATED',
                            'St. Marys Park/Moore': "SAINT MARY'S PARK CONSOLIDATED",
                            'Seth Low/Glenmore Plaza':'SETH LOW CONSOLIDATED',
                            'Woodson/Van Dyke II':'WOODSON',
                            'Beach 41st Street/Oceanside':'BEACH 41ST STREET-BEACH CHANNEL DRIVE',
                            'CONEY ISLAND' : 'SURFSIDE GARDENS CONSOLIDATED',
                            'BLAND' : 'LATIMER GARDENS CONSOLIDATED',
                            'GRAVESEND' : "O'DWYER GARDENS CONSOLIDATED",
                            'LES 2' : 'LOWER EAST SIDE CONSOLIDATED',
                            'OCEAN BAY' : 'BEACH 41ST STREET-BEACH CHANNEL DRIVE',
                            'OCEANBAY' : 'BEACH 41ST STREET-BEACH CHANNEL DRIVE',
                            "ST. MARY'S" : "SAINT MARY'S PARK CONSOLIDATED",
                            "ST. NICHOLAS" : 'SAINT NICHOLAS',
                            'UNION AVE. CON.' : 'UNION AVENUE CONSOLIDATED',
                            'WILLIAM REID' : 'REID APARTMENTS CONSOLIDATED',
                            'MURPHY CONSOLIDATED':'1010 EAST 178TH STREET',
                            'Murphy Consolidated':'1010 EAST 178TH STREET'}
    
    consolidations['NaN'] = {'name':'N/A',
                            'alternates':[]}
    for key, value in consolidation_corrections.items():
        for key_c, value_c in consolidations.items():
            if value_c['name'] == value: 
                try:
                    consolidations[key_c]['alternates'].append(key)
                except:
                    consolidations[key_c]['alternates'] = key
    
    for key, value in consolidations.items():
        for key_dev, value_dev in developments.items():
            if key == value_dev['cons_tds']:
                try:
                    value['developments'].append(key_dev)
                except:
                    value['developments'] = [key_dev]
    
    return(consolidations, developments)

#LOAD OVERVIEW DATA
def load_overview_data(drop_fha=True):
    '''Loads overview data, from the DATA folder.
    
    Parameters:
        drop_fha (bool): Determines whether FHA properties are dropped from the 
        relevant consolidations (in particular, Baisley Park -- TDS #091)
    
    Returns:
        dataFrame: A Pandas dataframe containing overview data on each development'''
    
    overview_data = pd.read_csv('DATA/overview_table_data.csv')
    overview_data['CONS_TDS'] = overview_data['CONS_TDS'].apply(lambda x: str(x).zfill(3))
    overview_data['TDS'] = overview_data['TDS'].apply(lambda x: str(x).zfill(3))
    if drop_fha:
        overview_data = overview_data.query(f'TDS not in {fha_tds_list}')
    
    return overview_data

#Character Substitutions for LaTeX -- set and define "clean" method
def clean_text(text):
    '''Replaces LaTeX-unfriendly characters in the provided block of text.
    
    Parameters:
        text (str): A block of text to be cleaned.
    
    Returns:
        str: A cleaned block of text ready to be inserted into a LaTeX document.'''
    
    substitutions = {'“':"``",
                '”': "''",
                '’':"'",
                ' ':' ',
                '–':'--',
                ' ':' ',
                '\xa0':' ',
                '&':r'\&',
                    ':':':',
                    '#':'\#'}
    
    for key, value in substitutions.items():
        text = text.replace(key, value)
        
    return text

In [5]:
#Generate list of consolidations and developments
consolidations, developments = get_standardized_names()

#Count developments per consolidation
counts ={}
for key, value in developments.items():
    if value['cons_tds'] not in counts.keys():
        counts[value['cons_tds']] = {'developments':[key],
                             'count':1}
    else:
        counts[value['cons_tds']]['developments'].append(key)
        counts[value['cons_tds']]['count']+=1
        

count_list = [value['count'] for key, value in counts.items()]
high_count_cons = [key for key, value in counts.items() if value['count']>=8]

#Load core data and create authoritative list of consolidations
overview_data = load_overview_data()
cons_list = overview_data['CONS_TDS'].unique().tolist()

TDS #nan raised an exception.


### Generate reports using existing files
Use the following methods to generate Individual Action Plans for some or all consolidations _without altering any report components generated using the methods below_. __First, run the following code block to define the compliation method.__

Please note that reports will not compile if components generated below in this notebook have not been generated at least once. If that is the case, please run all blocks in the following sections before generating the IAPs.



In [None]:
def compile_latex_file(tds, counts=counts, spreadlist=None, no_sections=False):
    '''When provided a TDS number and related information, generates the appropriate IAP as a PDF.
    
    Parameters:
        tds (str): A three-character string of the consolidation TDS number for which a report is to be generated.
        
        counts (dict): A dictionary containing the number of developments in each consolidation. Defaults to counts.
        
        spreadlist (list): A list on consolidation TDS numbers for which reports are to be generated
            as two-page spreads. Defaults to None.
        
        no_sections (bool): If True, reports are generated without section divider pages and the 
            "Letter from Chair" placeholder.
    
    Returns:
        None'''
    #SET UTILITY PATHS HERE
    pdflatex_path = '/usr/local/texlive/2018/bin/x86_64-darwin/pdflatex'
    ghostscript_path = '/usr/local/bin/gs'
    pdfjam_path = '/usr/local/texlive/2018/texmf-dist/scripts/pdfjam/pdfjam'
    if no_sections == False:
        if counts[tds]['count'] <= 4:
            with open('REPORT_TEMPLATE/report.tex', 'r') as file_handle:
                text = file_handle.read()

            new_text = text.replace('$tds_number$', str(tds))

            with open(f'REPORTS/LaTeX/{tds}_report.tex', 'w') as outfile:
                outfile.write(new_text)

        else:
            with open('REPORT_TEMPLATE/report_long.tex', 'r') as file_handle:
                text = file_handle.read()

            new_text = text.replace('$tds_number$', str(tds))

            with open(f'REPORTS/LaTeX/{tds}_report.tex', 'w') as outfile:
                outfile.write(new_text)

        subprocess.check_call([pdflatex_path, '-output-directory', 'REPORTS/LaTeX', f'REPORTS/LaTeX/{tds}_report.tex'])
        subprocess.check_call([pdflatex_path, '-output-directory', 'REPORTS/LaTeX', f'REPORTS/LaTeX/{tds}_report.tex'])

        #Be sure to install ghostscript (to compress pdfs), or comment out next line. Available via homebrew.
        subprocess.check_call([ghostscript_path, '-sDEVICE=pdfwrite', '-dCompatibilityLevel=1.5', '-dNOPAUSE', '-dQUIET', '-dBATCH', f'-sOutputFile=REPORTS/{tds}_report.pdf', f'REPORTS/LaTeX/{tds}_report.pdf'])
        #if tds in spreadlist:
            #handle = subprocess.Popen([pdfjam_path, '--nup 2x1', "--openright 'true'", f'REPORTS/{tds}_report.pdf', f'--outfile REPORTS/{tds}_spread.pdf', '--landscape', '--no-tidy'], stdout=subprocess.PIPE,stderr=subprocess.PIPE) 
            #err = handle.communicate() 
            #print(err)
            #subprocess.check_call([pdfjam_path, '--nup 2x1', "--openright 'true'", "--frame 'true'", f'REPORTS/{tds}_report.pdf', f'--outfile {tds}_spread.pdf', '--landscape'])
    else:
        if counts[tds]['count'] <= 4:
            with open('REPORT_TEMPLATE/report_nosec.tex', 'r') as file_handle:
                text = file_handle.read()

            new_text = text.replace('$tds_number$', str(tds))

            with open(f'REPORTS/NO_SECTIONS/LaTeX/{tds}_report.tex', 'w') as outfile:
                outfile.write(new_text)

        else:
            with open('REPORT_TEMPLATE/report_long_nosec.tex', 'r') as file_handle:
                text = file_handle.read()

            new_text = text.replace('$tds_number$', str(tds))

            with open(f'REPORTS/NO_SECTIONS/LaTeX/{tds}_report.tex', 'w') as outfile:
                outfile.write(new_text)

        subprocess.check_call([pdflatex_path, '-output-directory', 'REPORTS/NO_SECTIONS/LaTeX', f'REPORTS/NO_SECTIONS/LaTeX/{tds}_report.tex'])
        subprocess.check_call([pdflatex_path, '-output-directory', 'REPORTS/NO_SECTIONS/LaTeX', f'REPORTS/NO_SECTIONS/LaTeX/{tds}_report.tex'])

        #Be sure to install ghostscript (to compress pdfs), or comment out next line. Available via homebrew.
        subprocess.check_call([ghostscript_path, '-sDEVICE=pdfwrite', '-dCompatibilityLevel=1.5', '-dNOPAUSE', '-dQUIET', '-dBATCH', f'-sOutputFile=REPORTS/NO_SECTIONS/{tds}_report.pdf', f'REPORTS/NO_SECTIONS/LaTeX/{tds}_report.pdf'])
        #if tds in spreadlist:
            #handle = subprocess.Popen([pdfjam_path, '--nup 2x1', "--openright 'true'", f'REPORTS/{tds}_report.pdf', f'--outfile REPORTS/{tds}_spread.pdf', '--landscape', '--no-tidy'], stdout=subprocess.PIPE,stderr=subprocess.PIPE) 
            #err = handle.communicate() 
            #print(err)


    pass

#### Full Reports

By default, reports will be generated for all consolidations. To generate reports only for select consolidation, un-comment the line "consolidation_list = [...]", and insert the desired consolidation TDS numbers into this list as strings (e.g., ['073', '056', '111']).

In [None]:
#Create necessary directories and copy working template to use in compilation
if not os.path.exists('REPORTS'):
    os.makedirs('REPORTS')

if not os.path.exists('REPORTS/LaTeX'):
    os.makedirs('REPORTS/LaTeX')

os.system("cp REPORT_TEMPLATE/content.tex REPORTS/LaTeX")
os.system("cp REPORT_TEMPLATE/preface.tex REPORTS/LaTeX")
os.system("cp REPORT_TEMPLATE/content_long.tex REPORTS/LaTeX")

#Set list of consolidations for which reports should be compiled. 
#Recall that consolidations.keys() contains ALL CONSOLIDATIONS

consolidation_list = consolidations.keys()
#consolidation_list = ['073','127','067','003']


error_list = []
for tds in consolidation_list:
    try:
        compile_latex_file(tds, spreadlist=consolidation_list)
    except:
        print(f'{tds} raised exception: {sys.exc_info()[0]}')
        error_list.append(tds)

filelist = [f for f in os.listdir('REPORTS/LaTeX') if not f.endswith(".tex")]
save_files = [f'{tds}_report.log' for tds in error_list]

for f in filelist:
    if f not in save_files:
        os.remove(os.path.join('REPORTS/LaTeX', f))


#### Short Reports (for site visits)

Generate reports without section headings and unfinished pages using the following cell. Consolidation for which reports are to be generated are defined as above.

In [None]:
if not os.path.exists('REPORTS/NO_SECTIONS'):
    os.makedirs('REPORTS/NO_SECTIONS')

if not os.path.exists('REPORTS/NO_SECTIONS/LaTeX'):
    os.makedirs('REPORTS/NO_SECTIONS/LaTeX')

os.system("cp REPORT_TEMPLATE/content_nosec.tex REPORTS/NO_SECTIONS/LaTeX")
os.system("cp REPORT_TEMPLATE/preface.tex REPORTS/NO_SECTIONS/LaTeX")
os.system("cp REPORT_TEMPLATE/content_long_nosec.tex REPORTS/NO_SECTIONS/LaTeX")


consolidation_list = consolidations.keys()
#consolidation_list = ['024', '028']
error_list = []
for tds in consolidation_list:
    try:
        compile_latex_file(tds, spreadlist=consolidation_list, no_sections=True)
    except:
        print(f'{tds} raised exception: {sys.exc_info()[0]}')
        error_list.append(tds)

filelist = [f for f in os.listdir('REPORTS/NO_SECTIONS/LaTeX') if not f.endswith(".tex")]
save_files = [f'{tds}_report.log' for tds in error_list]

for f in filelist:
    if f not in save_files:
        os.remove(os.path.join('REPORTS/NO_SECTIONS/LaTeX', f))

## Generating Report Text, Tables, and Graphics

The following methods generate various components of the individual action plans, either by converting source documents into a LaTeX-friendly form or by loading and analyzing data in CSV files to create tables. These methods are sorted into several section, depending on their function.

### Parsing and Processing Text Blocks
These methods read and process Microsoft Word files such that they can be included in the reports' LaTeX source. 
__Please run the following block prior to running any particular conversion method.__

In [10]:
#Lightly modified version of example found at http://etienned.github.io/posts/extract-text-from-word-docx-simply/

try:
    from xml.etree.cElementTree import XML
except ImportError:
    from xml.etree.ElementTree import XML
import zipfile


"""
Module that extract text from MS XML Word document (.docx).
(Inspired by python-docx <https://github.com/mikemaccana/python-docx>)
"""

WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
PARA = WORD_NAMESPACE + 'p'
TEXT = WORD_NAMESPACE + 't'


def get_docx_text(path):
    """
    Take the path of a docx file as argument, return the text in unicode.
    """
    document = zipfile.ZipFile(path)
    try:
        xml_content = document.read('word/document.xml')
    except:
        xml_content = document.read('word/document2.xml')
        
    document.close()
    tree = XML(xml_content)

    paragraphs = []
    for paragraph in tree.getiterator(PARA):
        texts = [node.text
                 for node in paragraph.getiterator(TEXT)
                 if node.text]
        if texts:
            paragraphs.append(''.join(texts))

    return '\n\n'.join(paragraphs)

#### Preface -- What is an IAP?

The following two cells 1) define the method used to process text for the IAP preface and 2) run that method for each consolidation.

In [11]:
def load_preface_data():
    
    about_text = clean_text(get_docx_text('TEXT/preface_text/about_IAPs.docx'))
    staff_names = pd.read_excel('DATA/Dev_Staff_Names.xlsx')
    candidate_list = []

    for key, value in consolidations.items():
        candidate_list.append(str(value['name']).upper())
        for item in value['alternates']:
            candidate_list.append(item.upper())

    def get_cons_name(name):
        match = process.extractOne(str(name).upper(), candidate_list)[0]
        #print(match)
        for key, value in consolidations.items():
            if (match.upper() == value['name'].upper()) or (match.upper() in [val.upper() for val in value['alternates']]):
                return value['name']

        return '!!!NOT FOUND'

    def get_tds_from_name(x):
        for key, value in consolidations.items():
            if str(value['name']).upper().strip() == x.upper().strip():
                return key

        return 'N/A'

    staff_names['CONS_MATCH'] = staff_names['CONS'].apply(lambda x: get_cons_name(x))
    staff_names['CONS_TDS'] = staff_names['CONS_MATCH'].apply(lambda x: get_tds_from_name(x))
    
    return (staff_names, about_text)


def make_preface_text(tds, preface_data, about_text):
    cons_data = preface_data[preface_data['CONS_TDS'] == tds]
    
    latex_block = r'''\chapter{\textcolor{darkBlue}{Preface}}

    \section{Letter from the Chair}\label{sec:Section1}
    \clearpage
    {\fontfamily{phv}\selectfont
    \section{What is an Individual Action Plan?}'''+'\n\n'+about_text
    
    #Following is no longer included:
    '''
    Below is a list of %s Management Personnel as of August 2020:
    \begin{itemize}
    \item Operations VP: %s
    \item %s Borough Director: %s
    \item Regional Asset Manager: %s
    \item Property Manager: %s
    \item Superintendent: %s
    \end{itemize}
    }'''
    
    '''
    data = [cons name, ops vp, borough name, borough dir, RAM, PM, super]
    preface_data = []
    
    preface_data.append(str(cons_data['CONS_MATCH'].iloc[0]).title())
    
    for col in ['OPS_VP', 'BORO', 'BORO_DIR', 'RAM', 'PM', 'PMS']:
        preface_data.append(cons_data[col].iloc[0])
    '''
    
    '''
    with open(f'TEXT/preface_text/{tds}_preface.tex', 'w') as file_handle:
        file_handle.write(clean_text(latex_block % tuple(preface_data)))
    '''
    with open(f'TEXT/preface_text/{tds}_preface.tex', 'w') as file_handle:
        file_handle.write(clean_text(latex_block))


In [13]:
preface_data = load_preface_data()
for tds in consolidations.keys():
    try:
        make_preface_text(tds, *preface_data)
    except:
        print(f'{tds} raised an exception')

#### Overview Text

In [94]:
def make_overview_text(cons_tds):
    '''Loads and cleans Overview text (found in ...[cons_tds]_Overview.docx) for each consolidation.
    
    Parameters:
        cons_tds (str): The relevant consolidation TDS number as a three-character string.
    
    Returns:
        None'''
    
    u = UnicodeToLatexEncoder(non_ascii_only = True, unknown_char_policy = (lambda x: ' '))
    header = re.compile(r'''(([\w\-\'\’]*\s)*(Overview))\s*(:{0,2})\s*''')
    linebreaks = re.compile(r'[\n]+')
    
    overview_text = get_docx_text(f'TEXT/overview_text/{cons_tds}_Overview.docx')
    overview_text = clean_text(overview_text)
    
    if len(header.findall(overview_text)) == 0:
            overview_text = overview_text
    else:
        try:
            overview_text = overview_text.replace(header.findall(overview_text)[0],'')
            
        except:
            overview_text = overview_text.replace(header.findall(overview_text)[0][0],'')
    
    if overview_text[0] == ':':
        overview_text = overview_text[1:]
    
    overview_text = re.sub(linebreaks, r"\\par \\vspace{.7\\baselineskip}", overview_text.strip())
    with open(f'TEXT/overview_text/{cons_tds}_overview.tex', 'w') as file_handle:
        #file_handle.write(u.unicode_to_latex(overview_text))
        file_handle.write(overview_text)
    
    #return overview_text
    pass

In [95]:
for tds in consolidations.keys():
    try:
        make_overview_text(tds)
    except FileNotFoundError:
            pass
    
    #except:
     #   print(f"{tds} raised error")

#### Analysis Text

In [98]:
def make_analysis_text(cons_tds):
    '''Loads and cleans Analysis text (found in ...[cons_tds]_Analysis.docx) for each consolidation.
    
    Parameters:
        cons_tds (str): The relevant consolidation TDS number as a three-character string.
    
    Returns:
        None'''
    
    analysis_text = get_docx_text(f'TEXT/analysis_text/{cons_tds}_Analysis.docx')

    header = re.compile(r'''(([\w\-\'\’\(\)]*\s)*(Analysis)):{0,1}\s*''')
    
    analysis_text = clean_text(analysis_text)

    section_headings = {'Inspection and Collection Requirement':['Inspection and Collection Requirements',
                                                                 'Inspection and Collection Requirement',
                                                                 'Collection and Inspection Requirements',
                                                                'Collection and Inspection Requirement'],
                        'Removal or Storage Requirement':['Removal or Storage Requirements',
                                                          'Removal or Storage Requirement',
                                                          'Removal and Storage Requirements',
                                                         'Removal and Storage Requirement',
                                                         'Storage or Removal Requirement',
                                                          'Storage and Removal Requirements',
                                                         'Storage and Removal Requirement',
                                                         'Removal or Storage Requirement '],
                       'Additional Context':['Additional Context']}
    
    for heading, variants in section_headings.items():
        for variant in variants:
            if variant in analysis_text:
                analysis_text = analysis_text.replace(variant, r'\textbf{%s}' % (heading))
                break

    #if len(header.findall(analysis_text)) == 0:
     #   pass
    #else:
    try:
        analysis_text = analysis_text.replace(header.findall(analysis_text)[0][0],'')

    except:
        analysis_text = analysis_text.replace(header.findall(analysis_text)[0],'')
        
    if analysis_text[0] == ':':
        analysis_text = analysis_text[1:]

    latex_block = analysis_text

    with open(f'TEXT/analysis_text/{cons_tds}_analysis.tex', 'w') as file_handle:
        file_handle.write(latex_block)
        
    pass

In [99]:
for tds in consolidations.keys():
    try:
        make_analysis_text(tds)
    except FileNotFoundError:
        pass
    #except:
     #   print(f"{tds} raised error")

## Prepare Maps

The two following cells import context maps for each consolidation and split the images in two, to be placed on opposing pages.

__PLEASE NOTE: This operation takes a relatively long time, and does not need to be repeated frequently. Skip as necessary to maximize efficiency.__

In [17]:
# Set asset map path
#asset_map_path = f"MAPS/asset_maps/{cons_tds}_asset_map.png"

In [18]:
# Split context map into two pages
def process_context_map(cons_tds):
    '''Divides the context map for the supplied consolidation in half, exporting each as a separate file.
    
    Parameters:
        cons_tds (str): The relevant consolidation TDS number as a three-character string.
    
    Returns:
        None'''
    
    image = Image.open(f'MAPS/context_maps/{cons_tds}_context_map.png')
    width, height = image.size

    bb1 = (0,0,width/2,height)
    bb2 = (width/2, 0, width, height)

    img_1 = image.crop(bb1)
    img_2 = image.crop(bb2)

    img_1.save(f'MAPS/context_maps/{cons_tds}_context_1.png', format="PNG")
    img_2.save(f'MAPS/context_maps/{cons_tds}_context_2.png', format="PNG")
    
    pass

In [19]:
for tds in consolidations.keys():
    try:
        process_context_map(tds)
    except:
        pass

## Produce Tables

The following subsections produce each of the tables found in the IAPs. In general, each section contains two methods: one to load and process the relevant data, and another that inserts those data into a table.

Tables are, with some exceptions, built using LaTeX's tabular environment. The first block of text establishes the number of columns in the table; the width of each column, if specified; and how text is positioned in each column. Next, each row (including the header row) is created as a raw string, with placeholders (%s) for each piece of data to be inserted; these data are later supplied as a tuple of strings. Once the table is completely formed, the tabular (or similar) environment is ended with the command \end{tabular}.

#### Make Overview Table

In [17]:
def make_overview_table(cons_tds, overview_data=overview_data):
    '''Creates overview table, located in the introduction section.
    
    Parameters:
        cons_tds (str): The relevant consolidation TDS number as a three-character string.
        
        overview_data (dataFrame): A Pandas dataFrame containing core data on developments
    
    Returns:
        None'''
    
    cons_data = overview_data.loc[overview_data['CONS_TDS']== cons_tds]
    
    overview_table = ''

    overview_frame = r'''
    \resizebox{\textwidth}{!}{
    \begin{tabular}{l|c|c|c|c|c|c|}
    \cline{2-7}
                                                                           & \cellcolor{ccteal}{\color[HTML]{FFFFFF} TDS \#} & \cellcolor{ccteal}{\color[HTML]{FFFFFF} Stairhalls \#} & \cellcolor{ccteal}{\color[HTML]{FFFFFF} Units}  & \cellcolor{ccteal}{\color[HTML]{FFFFFF} Households} & \cellcolor{ccteal}{\color[HTML]{FFFFFF} Official Population} & \cellcolor{ccteal}{\color[HTML]{FFFFFF} Average Family Size} \\ \hline

    '''

    development_template = r'''\multicolumn{1}{|l|}{\cellcolor{ccteallight}%s}        & %s                                                   & %s                            & %s                                                   & %s                                                           & %s                                                                & %s                                                                \\ \hline'''


    overview_table += overview_frame

    for row in cons_data.itertuples():
        dev_name = clean_text(row.DEV_NAME.title())
        dev_tds = row.TDS
        stairhalls = int(row.STAIRHALLS)
        units = f"{int(row.TOTAL_APTS):,d}" #Adds thousands comma sep.
        total_hhs = row.TOTAL_HH
        official_population = row.TOTAL_POP
        avg_family_size = row.AVG_FAMILY_SIZE

        overview_table += development_template % (dev_name, dev_tds, stairhalls, units, total_hhs, official_population, avg_family_size)

    overview_table += r'''
    \end{tabular}
    }
    '''
    
    with open(f'TABLES/overview_table/{cons_tds}_overview_table.tex', 'w') as file_handle:
        file_handle.write(overview_table)
    

In [18]:
overview_data = load_overview_data()
for tds in consolidations.keys():
    make_overview_table(tds, overview_data)

#### Typology Table

To associate developments with typologies, the following routines 1) load two sources of typology data and other data related to developments' physical characteristics, 2) matches detailed typologies to icons used in the Connected Communities plan, and 3) produces a table showing development names, typologies, and typology icons.

Note the structure of the tabular environment here: text-icon blocks are split into one or two lines, depending on the size of the consolidation. Each line is then embedded in an outer one-by-two table, so that the entire assembly is inserted into the document as a unit. 


In [19]:
def load_typology_data(drop_fha=True):
    #Cleaning and Shaping Data
    typ_1 = pd.read_csv('DATA/typologies_1.csv')
    typ_2 = pd.read_csv('DATA/typologies_2.csv')

    typ_1.columns = ['CONS_NAME', 'DEV_NAME', 'TDS', 'TYPOLOGY']
    typ_2.columns = ['CONS_NAME', 'CONS_TDS', 'DEV_NAME', 'TDS', 'METHOD', 
                     'CONSTRUCTION_DATE', 'BLDG_AGE', 'STORIES', 'BLDG_COVERAGE_SQFT', 'OPEN_SPACE_RATIO', 'SCATTERED_SITE_FLAG']

    def make_dates(date_col):
        date = str(date_col).split('/')
        try:
            if int(date[2]) > 18:
                return datetime.date(int(f'19{date[2]}'), int(date[0]), int(date[1]))
            else:
                return datetime.date(int(f'20{date[2]}'), int(date[0]), int(date[1]))
        except IndexError:
            return datetime.date(1900,1,1)

    typ_2['CONSTRUCTION_DATE'] = typ_2['CONSTRUCTION_DATE'].apply(lambda x: make_dates(x))
    typ_2['SCATTERED_SITE_FLAG'] = typ_2['SCATTERED_SITE_FLAG'].apply(lambda x: x == 'YES')
    typ_2.loc[typ_2['SCATTERED_SITE_FLAG']=='YES','SCATTERED_SITE_FLAG'] = 1

    typology = typ_1.merge(typ_2[['CONS_TDS', 'TDS', 'METHOD',
                                 'CONSTRUCTION_DATE', 'BLDG_AGE', 
                                 'STORIES', 'BLDG_COVERAGE_SQFT', 
                                 'OPEN_SPACE_RATIO', 'SCATTERED_SITE_FLAG']], how='left', on='TDS')

    typology['CONS_TDS'] = typology['CONS_TDS'].apply(lambda x: str(int(x)).zfill(3))
    typology['PREWAR'] = typology['CONSTRUCTION_DATE'].apply(lambda x: x < datetime.date(1945,1,1))
    
    if drop_fha:
        typology = typology.query(f'TDS not in {fha_tds_list}')
    #Adding Typology Icons

    typ_icons = [r'\rootpath/IMAGES/typology_earlytower.png', r'\rootpath/IMAGES/typology_towerpark.png', r'\rootpath/IMAGES/typology_prewar.png', r'\rootpath/IMAGES/typology_scatteredsite.png']
    typ_dict = {}
    [typ_dict.setdefault(key, '') for key in typology['TYPOLOGY'].unique().tolist()]

    typ_dict['1 - High-rise in the park'] = typ_icons[1]
    typ_dict['2 - Mid-rise in the park'] = typ_icons[1]
    typ_dict['3 - Low-rise in the park'] = typ_icons[0]
    typ_dict['4 - Context Towers'] = typ_icons[3]
    typ_dict['5 - Context Mid-rises'] = typ_icons[2]
    typ_dict['6 - Walkups & Brownstones'] = typ_icons[2]

    typ_header = re.compile(r'\d\s-\s')

    typology['TYP_NAME'] = typology['TYPOLOGY'].apply(lambda x: typ_header.sub('', str(x)))
    typology['IMAGE_PATH'] = typology['TYPOLOGY'].apply(lambda x: typ_dict[x])
    
    return typology

In [20]:
def make_typology_table_block(cons_tds, typ_data):
    cons_data = typ_data[typ_data['CONS_TDS'] == cons_tds]
    num_devs = cons_data.shape[0]
    
    if num_devs < 5:
        block_1 = cons_data
    
    elif num_devs >=5 and num_devs < 7:
        block_1 = cons_data.iloc[0:3]
        block_2 = cons_data.iloc[3:]
    
    else:
        block_1 = cons_data.iloc[0:4]
        block_2 = cons_data.iloc[4:]
    
    len_1 = block_1.shape[0]
    
    try:
        len_2 = block_2.shape[0]
    except:
        len_2 = 0
    
    headers = {1:r"\begin{tabular}{m{1.5in} m{2in}}"+'\n',
              2:r"\begin{tabular}{m{1.25in} m{2in} m{.1in} m{1.25in} m{2in}}"+'\n',
              3:r"\begin{tabular}{m{1.25in} m{1.5in} m{.2in} m{1.25in} m{1.5in} m{.2in} m{1.25in} m{1.5in}}"+'\n',
              4:r"\begin{tabular}{m{1.25in} m{1.25in} m{.2in} m{1.25in} m{1.25in} m{.2in} m{1.25in} m{1.25in} m{.2in} m{1.25in} m{1.25in}}"+'\n'}
         
    lines = {1:r'''\textbf{%s:} {%s} & \includegraphics[height=2in]{%s}'''+'\n'+r'\end{tabular}',
            2:r'''\textbf{%s:} {%s} & \includegraphics[height=2in]{%s} & & \textbf{%s:} {%s} & \includegraphics[height=2in]{%s}'''+'\n'+r'\end{tabular}',
            3:r'''\textbf{%s:} {%s} & \includegraphics[height=1.5in]{%s} & & \textbf{%s:} {%s} & \includegraphics[height=1.5in]{%s} & & \textbf{%s:} {%s} & \includegraphics[height=1.5in]{%s}'''+'\n'+r'\end{tabular}',
            4:r'''\textbf{%s:} {%s} & \includegraphics[height=1.5in]{%s} & & \textbf{%s:} {%s} & \includegraphics[height=1.5in]{%s} & & \textbf{%s:} {%s} & \includegraphics[height=1.5in]{%s}& & \textbf{%s:} {%s} & \includegraphics[height=1.5in]{%s}'''+'\n'+r'\end{tabular}'}
    
    
    data_1 = []
    data_2 = []
    
    for row in block_1.itertuples():
        data_1.append(clean_text(str(row.DEV_NAME).title()))
        data_1.append(str(row.TYP_NAME).replace('&', '\&'))
        data_1.append(row.IMAGE_PATH)
    
    if len_2 > 0:
        for row in block_2.itertuples():
            data_2.append(clean_text(str(row.DEV_NAME.title())))
            data_2.append(str(row.TYP_NAME).replace('&', '\&'))
            data_2.append(row.IMAGE_PATH)
    
    # Assembling Nested Tables
    latex_block = ''
    if num_devs >= 2:
        latex_block += r'''\begin{table}[H]
        \resizebox{.9\textwidth}{!}{
        \begin{tabular}{c}
        '''
    else:
        latex_block += r'''\begin{table}[H]
        \begin{tabular}{c}
        '''
    
    latex_block += headers[len_1]
    latex_block += lines[len_1] % tuple(data_1)
    
    if len_2 > 0:
        latex_block += r'''\\
        '''
        latex_block += headers[len_2]
        latex_block += lines[len_2] % tuple(data_2)
    
    if num_devs >= 2:
        latex_block += r'''\end{tabular}}
        \end{table}'''
    else:
        latex_block += r'''\end{tabular}
        \end{table}'''
    
    with open(f'TABLES/typology_table/{cons_tds}_typology.tex', 'w') as file_handle:
        file_handle.write(latex_block)

A note: upon generating tables, execptions can be expected for TDS numbers 210 and 128.

In [21]:
typology = load_typology_data()

for tds in consolidations.keys():
    try:
        make_typology_table_block(tds, typology)
    except:
        print(f'{tds} raised an exception.')

  res_values = method(rvalues)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


210 raised an exception.
128 raised an exception.
NaN raised an exception.


#### Waste Services and Assets

In [15]:
bulk_container_counts = pd.read_csv('DATA/BULK_CONTAINER_COUNTS.csv')
bulk_container_counts.drop(columns=bulk_container_counts.columns[3:], inplace=True)

In [16]:
def load_wsa_data():
    wsa_data = pd.read_csv('DATA/WASTE_SERVICES_ASSETS.csv')
    wsa_data['TDS'] = wsa_data['DEV_TDS'].apply(lambda x: str(x).zfill(3))
    wsa_data['DEV_TDS'] = wsa_data['DEV_TDS'].apply(lambda x: str(x).zfill(3))
    wsa_data['INT_COMP_DATE'] = pd.to_datetime(wsa_data['INT_COMP_INSTALL_DATE'], errors='ignore')
    
    waste_collection_days = pd.read_csv('DATA/WASTE_COLLECTION_SCHEDULE.csv')
    waste_collection_days['DEV_TDS'] = waste_collection_days['DEV_TDS'].apply(lambda x: str(int(x)).zfill(3))
    day_abbreviations = {'Mon':'M',
                        'Tue':'T',
                        'Wed':'W',
                        'Thu':'Th',
                        'Fri':'F',
                        'Sat':'Sa',
                        'Sun':'Su'}
    
    def convert_days(x):
        new_x = ''
        try:
            if ',' in str(x):
                for part in str(x).split(','):
                    new_x += day_abbreviations[part.strip()]

            else:
                new_x = day_abbreviations[str(x).strip()]

            return new_x
        except:
            pass
    
    for col in ['FREQ_REFUS','FREQ_RECYC','FREQ_BULK']:
        waste_collection_days[col] = waste_collection_days[col].apply(lambda x: convert_days(x))
    
    
    def get_date(x):
        try:
            return x.strftime('%Y')
        except:
            return ' '

    extcomp_data = pd.read_csv('DATA/EXT_COMPACTORS.csv')
    extcomp_data['TDS'] = extcomp_data['LOCATION'].apply(lambda x: x.split('.')[0])
    extcomp_data['INSTALLDATE'] = pd.to_datetime(extcomp_data['INSTALLDATE'])
    extcomp_data.head()
    groupby = extcomp_data.groupby('TDS').agg({'ASSETNUM':'count', 'INSTALLDATE': max}).reset_index()
    groupby['EXT_COMP_YEAR'] = groupby['INSTALLDATE'].apply(lambda x: get_date(x))
    groupby['TDS'] = groupby['TDS'].apply(lambda x: str(x).zfill(3))
    
    #Adding number of bulk containers
    bulk_container_counts = pd.read_csv('DATA/BULK_CONTAINER_COUNTS.csv')
    candidate_list = []
    for key, value in developments.items():
        candidate_list.append(str(value['name']).upper())
        for item in value['name_alternates']:
            candidate_list.append(item.upper())

    def get_dev_name(name):
        match = process.extractOne(str(name).upper(), candidate_list)[0]
        #print(match)
        for key, value in developments.items():
            if (match.upper() == value['name'].upper()) or (match.upper() in [val.upper() for val in value['name_alternates']]):
                return value['name']

        return '!!!NOT FOUND'

    bulk_container_counts['DEV_NAME'] = bulk_container_counts['DEV_NAME'].apply(lambda x: get_dev_name(x))
    wsa_data = wsa_data.merge(bulk_container_counts, on='DEV_NAME', how='left')

    def get_count(x):
        try:
            return (str(int(x)))
        except:
            return '0'

    wsa_data['COUNT'] = wsa_data['COUNT'].apply(lambda x: get_count(x))
    
    wsa_data = wsa_data.merge(waste_collection_days, on='DEV_TDS', how='left')
    wsa_data = wsa_data.merge(groupby, on='TDS', how='left')
    
    #wsa_data[wsa_data['EXT_COMP'] != 0]

    return wsa_data

In [11]:
def make_waste_services_table(cons_tds, wsa_data, counts_dict=counts):
    dev_list = counts_dict[cons_tds]['developments']
    if cons_tds == '091':
        cons_data = wsa_data.query(f"TDS in {dev_list}").iloc[1:]
    else:
        cons_data = wsa_data.query(f"TDS in {dev_list}")
    num_devs = counts_dict[cons_tds]['count']
    
    bulk_pickup_site = ''
    
    for dev in cons_data.itertuples():
        if pd.isna(dev.BULK_HAULER):
            pass
        else:
            bulk_pickup_site = str(dev.DEV_NAME).title()
    
    def make_waste_services_block(num_cols, block_data, bulk_pickup_site = bulk_pickup_site):
        col_format = r'X|'
        header = r'\begin{tabularx}{\textwidth}{V{1.5in}|'+col_format*(num_cols)+r'''}
    \cline{2-%s}
                                                                                       '''% (num_cols)+r'& \cellcolor{ccorange}{\color[HTML]{FFFFFF} %s}'*num_cols+r' \\ \hline'+'\n'
        hh_waste_line = r'\multicolumn{1}{|V{1.5in}|}{\cellcolor{ccorangelight}Household Waste (DSNY)}               '+r'& %s'*num_cols+r'\\ \hline'+'\n'
        bulk_waste_line = r'\multicolumn{1}{|V{1.5in}|}{\cellcolor{ccorangelight}Bulk Waste}                  '+r'& %s'*num_cols+r' \\ \hline'+'\n'
        norm_recycling_line = r'\multicolumn{1}{|V{1.5in}|}{\cellcolor{ccorangelight}%s}                   '+r'& DSNY Curb Setout; collected %s'*num_cols + r'\\ \hline'+'\n'
        special_recycling_line = r'\multicolumn{1}{|V{1.5in}|}{\cellcolor{ccorangelight}%s}                   '+r'& %s'*num_cols +r'\\ \hline' + '\n'
        
        latex_block = r''''''
        latex_block += header % tuple(block_data['DEV_NAME'].apply(lambda x: clean_text(str(x).title())).tolist())
        
        
        hh_waste_data = []
        bulk_waste_data = []
        ewaste_data = []
        textiles_data = []
    
        if bulk_pickup_site == '':
            for dev in block_data.itertuples():
                if dev.CURBSIDE == 1:
                    hh_waste_data.append(f'Curbside Pickup {dev.FREQ_REFUS}')
                elif dev.SHARE == 1:
                    hh_waste_data.append(f'Transfer to {clean_text(str(dev.SHARE_SITE).title())}')
                else:
                    if True:
                        if (dev.EXT_COMP_BE == 1) and (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactor in {int(dev.COMPACTOR_YARDS)} waste yard; collected as requested by staff')
                        elif (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yard; collected as requested by staff')
                        else:
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yards; collected as requested by staff')
                    else:
                        if (dev.EXT_COMP_BE == 1) and (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactor in {int(dev.COMPACTOR_YARDS)} waste yard; last replaced {dev.EXT_COMP_YEAR}')
                        elif (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yard; last replaced {dev.EXT_COMP_YEAR}')
                        else:
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yards; last replaced {dev.EXT_COMP_YEAR}')
                    
                if pd.isna(dev.BULK_HAULER):
                    if int(dev.BULK_SITES) == 0:
                        bulk_waste_data.append(f"Transferred for Pickup")
                    elif int(dev.BULK_SITES) == 1:
                        bulk_waste_data.append(f"One Bulk Waste Holding Site; Transferred for Pickup")
                    else:
                        bulk_waste_data.append(f"{dev.BULK_SITES} Bulk Waste Holding Sites; Transferred for Pickup")
                else:
                    if int(dev.BULK_SITES) == 1:
                        bulk_waste_data.append(f"One Bulk Waste Holding; Picked up by {dev.BULK_HAULER}")
                    elif int(dev.BULK_SITES) > 1:
                        bulk_waste_data.append(f"{dev.BULK_SITES} Bulk Waste Holding Sites; Picked up by {dev.BULK_HAULER}")
                    else:
                        bulk_waste_data.append(f"Picked up by {dev.BULK_HAULER}")

                if dev.ECYCLE == 1:
                    ewaste_data.append('Previously available through ECycle')
                else:
                    ewaste_data.append('N/A')

                if dev.REFASHION == 1:
                    textiles_data.append('Previously available through Refashion')
                else:
                    textiles_data.append('N/A')
        else:
            for dev in block_data.itertuples():
                if dev.CURBSIDE == 1:
                    hh_waste_data.append(f'Curbside Pickup {dev.FREQ_REFUS}')
                elif dev.SHARE == 1:
                    hh_waste_data.append(f'Transfer to {clean_text(str(dev.SHARE_SITE).title())}')
                else:
                    if True: 
                        if (dev.EXT_COMP_BE == 1) and (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactor in {int(dev.COMPACTOR_YARDS)} waste yard; collected as requested by staff')
                        elif (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yard; collected as requested by staff')
                        else:
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yards; collected as requested by staff')
                    else:
                        if (dev.EXT_COMP_BE == 1) and (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactor in {int(dev.COMPACTOR_YARDS)} waste yard; last replaced {dev.EXT_COMP_YEAR}')
                        elif (dev.COMPACTOR_YARDS == 1):
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yard; last replaced {dev.EXT_COMP_YEAR}')
                        else:
                            hh_waste_data.append(f'{int(dev.EXT_COMP_BE)} exterior compactors in {int(dev.COMPACTOR_YARDS)} waste yards; last replaced {dev.EXT_COMP_YEAR}')
                    
                if pd.isna(dev.BULK_HAULER):
                    if int(dev.BULK_SITES) == 0:
                        bulk_waste_data.append(f"Transferred to {bulk_pickup_site} for Pickup")
                    elif int(dev.BULK_SITES) == 1:
                        bulk_waste_data.append(f"One Bulk Waste Holding Site; Transferred to {bulk_pickup_site} for Pickup")
                    else:
                        bulk_waste_data.append(f"{dev.BULK_SITES} Bulk Waste Holding Sites; Transferred to {bulk_pickup_site} for Pickup")
                else:
                    if int(dev.BULK_SITES) == 1:
                        bulk_waste_data.append(f"One Bulk Waste Holding Site; Picked up by {dev.BULK_HAULER}")
                    elif int(dev.BULK_SITES) > 1:
                        bulk_waste_data.append(f"{dev.BULK_SITES} Bulk Waste Holding Sites; Picked up by {dev.BULK_HAULER}")
                    else:
                        bulk_waste_data.append(f"Picked up by {dev.BULK_HAULER}")

                if dev.ECYCLE == 1:
                    ewaste_data.append('Previously available through ECycle')
                else:
                    ewaste_data.append('N/A')

                if dev.REFASHION == 1:
                    textiles_data.append('Previously available through Refashion')
                else:
                    textiles_data.append('N/A')
        
        latex_block += hh_waste_line % tuple(hh_waste_data)
        latex_block += bulk_waste_line % tuple(bulk_waste_data)
        latex_block += norm_recycling_line % tuple(['Recycling: Paper and Cardboard']+block_data['FREQ_RECYC'].tolist())
        latex_block += norm_recycling_line % tuple(['Recycling: Metal, Glass and Plastic']+block_data['FREQ_RECYC'].tolist())
        latex_block += special_recycling_line % tuple(['Recycling: Mattresses']+['N/A' for i in range(0, num_cols)])
        latex_block += special_recycling_line % tuple(['Recycling: E-Waste']+ewaste_data)
        latex_block += special_recycling_line % tuple(['Recycling: Textiles']+textiles_data)
        latex_block += r'\end{tabularx}'
        
        return latex_block
    
    if num_devs <= 4:
        num_cols = num_devs
        block_data = cons_data
        #print(cons_data)
        with open(f'TABLES/waste_services/{cons_tds}_waste_services.tex', 'w') as file_handle:
            file_handle.write(make_waste_services_block(num_cols, block_data))
        
    elif num_devs > 4:
        num_cols_1 = math.ceil(num_devs/2)
        num_cols_2 = (num_devs-num_cols_1)
        block_data_1 = cons_data.iloc[0:num_cols_1]
        block_data_2 = cons_data.iloc[num_cols_1:]
        
        with open(f'TABLES/waste_services/{cons_tds}_waste_services_1.tex', 'w') as file_handle:
            file_handle.write(make_waste_services_block(num_cols_1, block_data_1))
            
        with open(f'TABLES/waste_services/{cons_tds}_waste_services_2.tex', 'w') as file_handle:
            file_handle.write(make_waste_services_block(num_cols_2, block_data_2))
    
    pass

In [12]:
wsa_data = load_wsa_data()

for tds in consolidations.keys():
    try:
        make_waste_services_table(tds, wsa_data)
    except:
        print(f'{tds} raised exception')

NameError: name 'load_wsa_data' is not defined

In [26]:
def make_waste_assets_table(cons_tds, wsa_data, counts_dict=counts):
    dev_list = counts_dict[cons_tds]['developments']
    if cons_tds == '091':
        cons_data = wsa_data.query(f"TDS in {dev_list}").iloc[1:]
    else:
        cons_data = wsa_data.query(f"TDS in {dev_list}")
    num_devs = counts_dict[cons_tds]['count']
    
#    header = r'''
#    \begin{tabular}{V{.25\columnwidth}|V{.15\columnwidth}|V{.15\columnwidth}|V{.25\columnwidth}|V{.15\columnwidth}|}
#\cline{2-5}
#                                                                                              & \cellcolor{ccorangelight}{\color[HTML]{000000} Internal Compactors} & \cellcolor{ccorangelight}{\color[HTML]{000000} External Compactors} & \cellcolor{ccorangelight}{\color[HTML]{000000} Other External Assets}   & \cellcolor{ccorangelight}{\color[HTML]{000000} Recycling Bins\tnote{1}} \\ \hline'''+'\n'
#    line_format = r'\multicolumn{1}{|V{.25\columnwidth}|}{\cellcolor{ccorange}{\color[HTML]{FFFFFF} %s}}        & %s                                                & %s                                                                  & %s & %s                                                            \\ \hline'+'\n'
    
    header = r'''
    \begin{tabular}{V{.15\columnwidth}|V{.15\columnwidth}|V{.15\columnwidth}|V{.15\columnwidth}|V{.15\columnwidth}|V{.15\columnwidth}|V{.15\columnwidth}|}
\cline{2-6}
                                                                                              & \cellcolor{ccorangelight}{\color[HTML]{000000} Internal Compactors} & \cellcolor{ccorangelight}{\color[HTML]{000000} External Compactors}  & \cellcolor{ccorangelight}{\color[HTML]{000000} Bulk Containers} & \cellcolor{ccorangelight}{\color[HTML]{000000} Cardboard Balers} & \cellcolor{ccorangelight}{\color[HTML]{000000} Mattress Containers} & \cellcolor{ccorangelight}{\color[HTML]{000000} Recycling Bins\tnote{1}} \\ \hline'''+'\n'
    line_format = r'\multicolumn{1}{|V{.15\columnwidth}|}{\cellcolor{ccorange}{\color[HTML]{FFFFFF} %s}}        & %s    & %s                                               & %s           & %s      & %s                                                             & %s                                                             \\ \hline'+'\n'
    
    
    latex_block = r''''''
    latex_block += header
    
    for dev in cons_data.itertuples():
        line_data = []
        line_data.append(clean_text(str(dev.DEV_NAME).title()))
        
        if (dev.INT_COMP == 0):
            int_comp_string = '0'
        elif pd.isna(dev.INT_COMP_DATE):
            int_comp_string = str(int(dev.INT_COMP))
        else:
            int_comp_string = f'{str(int(dev.INT_COMP))}; last replaced {str(dev.INT_COMP_DATE.year)}'
        
        line_data.append(int_comp_string)
        
        if pd.isna(dev.EXT_COMP_YEAR):
            line_data.append(str(int(dev.EXT_COMP_BE)))
        else:
            line_data.append(f"{int(dev.EXT_COMP_BE)}; last replaced {dev.EXT_COMP_YEAR}")
        
        #if (dev.BULK_CRUSHERS == 0) and (dev.BALERS == 0)... REDO THIS ONCE DATA ARE COMPLETE
        #line_data.append('PLACEHOLDER UNTIL DATA ARE COMPLETE')
        try:
            line_data.append(str(int(dev.COUNT)))
        except:
            line_data.append(str(dev.COUNT))
        
        line_data.append(str(int(dev.BALERS)))
        
        line_data.append(str(int(0)))
        
        line_data.append(str(int(dev.RECYCLING_BINS)))
        
        latex_block += line_format % tuple(line_data)
    
    latex_block += r'\end{tabular}'
    
    with open(f'TABLES/waste_assets/{cons_tds}_waste_assets.tex', 'w') as file_handle:
        file_handle.write(latex_block)    
    pass
    

In [27]:
wsa_data = load_wsa_data()
for tds in consolidations.keys():  
    try:
        make_waste_assets_table(tds, wsa_data)
    except:
        print(f'{tds} raised exception')

NaN raised exception


#### Consolidation Assets

Similarly to the typologies table above, this section makes use of nested tables to ensure a consistent layout. In this case, the name and number of each vehicle type is positioned above an icon of that vehicle in a 1-by-two table; these tables are then arranged side-by-side in a single-row table.

In [29]:
def load_vehicle_data():
    vehicle_data = pd.read_excel('DATA/vehicle_inventory.xlsx')
    vehicle_data['CONS'] = vehicle_data['WORK LOCATION'].apply(lambda x: str(x).replace('NYCHA-',''))

    candidate_list = []
    for key, value in consolidations.items():
        candidate_list.append(str(value['name']).upper())
        for item in value['alternates']:
            candidate_list.append(item.upper())

    def get_cons_name(name):
        match = process.extractOne(str(name).upper(), candidate_list)[0]
        #print(match)
        for key, value in consolidations.items():
            if (match.upper() == value['name'].upper()) or (match.upper() in [val.upper() for val in value['alternates']]):
                return value['name']

        return '!!!NOT FOUND'


    def get_tds_from_name(x):
        for key, value in consolidations.items():
            if str(value['name']).upper().strip() == x.upper().strip():
                return key

        return 'N/A'
    
    
    def get_vehicle_type(x):
        
        if type(x) != str:
            return 'N/A'
        
        van_keys = ['VAN', 'SPRINTER', 'ECONOLINE', 'TRANSIT']
        truck_keys = ['PICK-UP', 'F250', 'F450', 'SIERRA', 'RANGER', 'Pick-Up']
        for key in van_keys:
            if key in x:
                return 'VAN'

        for key in truck_keys:
            if key in x:
                return 'TRUCK'

        return 'OTHER'
    
    vehicle_data['CONS_MATCH'] = vehicle_data['CONS'].apply(lambda x: get_cons_name(str(x)))

    vehicle_data['TDS'] = vehicle_data['CONS_MATCH'].apply(lambda x: get_tds_from_name(str(x)))
    #vehicle_data['CONS_TDS'] = vehicle_data['TDS'].apply(lambda x: developments[x]['cons_tds'] if x in developments.keys() else np.NaN)
    vehicle_data['TYPE'] = vehicle_data['DESCRIPTION'].apply(lambda x: get_vehicle_type(x))
    
    return vehicle_data

In [30]:
def load_horticultural_data():
    equip_table = pd.read_csv('DATA/hort_equipment.csv')
    equip_types = pd.read_csv('DATA/hort_equipment_types.csv')
    equip_table.rename(columns={'MODL':'MODEL'}, inplace=True)
    equip_table = equip_table.merge(equip_types, on=['MAKE', 'MODEL'], how='left')
    
    candidate_list = []
    for key, value in consolidations.items():
        candidate_list.append(str(value['name']).upper())
        for item in value['alternates']:
            candidate_list.append(item.upper())

    def get_cons_name(name):
        match = process.extractOne(str(name).upper(), candidate_list)[0]
        #print(match)
        for key, value in consolidations.items():
            if (match.upper() == value['name'].upper()) or (match.upper() in [val.upper() for val in value['alternates']]):
                return value['name']

        return '!!!NOT FOUND'


    def get_tds_from_name(x):
        for key, value in consolidations.items():
            if str(value['name']).upper().strip() == x.upper().strip():
                return key

        return 'N/A'
    
    def clean_consolidations(x):
        correction_dict = {'': 'N/A',
                            'Loaner': 'N/A',
                            'ATLANTIC TERMINAL': 'WYCKOFF GARDENS CONSOLIDATED',
                            'Assignment':'N/A',
                            'BAYCHESTER':'N/A',
                            'FLEET (GSD)' : 'N/A',
                            'FLEET LOANER' : 'N/A',
                            'LOANER' : 'N/A',
                            'Loaner' : 'N/A',
                            'MAR' : 'N/A',
                            'MRST' : 'N/A',
                            'MURPHY CONSOL.' : '1010 EAST 178TH STREET',
                            'MURPHY CONSOLIDATED' : '1010 EAST 178TH STREET',
                            'MURPHY' : '1010 EAST 178TH STREET',
                            "TBD" : 'N/A',
                            'loaner':'N/A'}
        if str(x).strip() in correction_dict.keys():
            return correction_dict[str(x).strip()]
        else:
            return x
        
    equip_table['CONS'] = equip_table['LOCATION'].apply(lambda x: get_cons_name(clean_consolidations(x)))
    equip_table['CONS_TDS'] = equip_table['CONS'].apply(lambda x: get_tds_from_name(x))
    
    return equip_table

In [31]:
def make_consolidation_assets_table(cons_tds, vehicle_data, hort_data):
    cons_vehicle_data = vehicle_data[vehicle_data['TDS'] == cons_tds]
    cons_hort_data = hort_data[hort_data['CONS_TDS'] == cons_tds]
    
    block_template = r'''\begin{tabular}{m{.25\columnwidth}m{.25\columnwidth}m{.25\columnwidth}m{.25\columnwidth}}
    {\color{ccorange} %s Trucks} & {\color{ccorange} %s Skid Steers} & {\color{ccorange} %s Tractors} & {\color{ccorange} %s Sweepers} \\
    \includegraphics[width=.15\columnwidth]{\rootpath/IMAGES/truck.png}  & \includegraphics[width=.15\columnwidth]{\rootpath/IMAGES/bobcat.png} & \includegraphics[width=.15\columnwidth]{\rootpath/IMAGES/tractor.png} & \includegraphics[width=.15\columnwidth]{\rootpath/IMAGES/road-sweeper.png}                         
    \end{tabular}'''
    
    num_trucks = cons_vehicle_data[cons_vehicle_data['TYPE'] == 'TRUCK'].shape[0]
    num_vans = cons_vehicle_data[cons_vehicle_data['TYPE'] == 'VAN'].shape[0]
    num_skidsteers = cons_hort_data[cons_hort_data['TYPE'] == 'SKIDSTEER'].shape[0]
    num_mowers = cons_hort_data[cons_hort_data['TYPE'] == 'MOWER'].shape[0]
    num_tractors = cons_hort_data.query(f"TYPE in {['TRACTOR', 'TOOLCAT']}").shape[0]
    num_sweepers = cons_hort_data.query(f"TYPE in {['SWEEPER', 'VAC']}").shape[0]
    
    block_data = [str(num_trucks), str(num_skidsteers), str(num_tractors), str(num_sweepers)]
    
    with open(f'TABLES/consolidation_assets/{cons_tds}_consolidation_assets.tex', 'w') as file_handle:
        file_handle.write(block_template % tuple(block_data))

In [32]:
vehicle_data = load_vehicle_data()
hort_data = load_horticultural_data()

for tds in consolidations.keys():
    try:
        make_consolidation_assets_table(tds, vehicle_data, hort_data)
    except:
        print(f'{tds} raised exception')


#### Waste Calculator

In [20]:
def add_waste_cols(overview_data):
    
    try:
        diversion_rates = pd.read_csv('DIVERSION_RATES.csv')
    except:
        pass
    
    conversion_factors = {'units_to_tons_day': 0.0025,
                         'cy_per_ton': {'trash': 21.05,
                                        'trash_actual': 0,
                                       'MGP': 18.02,
                                       'cardboard': 26.67,
                                       'paper': 6.19,
                                       'organics': 4.32,
                                       'ewaste': 5.65,
                                       'textiles': 13.33},
                         'gallons_per_cy': 201.974,
                         'gallons_per_64gal': 64,
                         'gallons_per_40lb_bag': 44,
                         'cy_per_44gal_bag':0.174,
                         'cy_per_cardboard_bale':0.193}

    waste_percentages = {'trash': .26,
                         'trash_actual':.894,
                        'MGP': .19,
                        'cardboard': .07,
                        'paper': .07,
                        'organics':.32,
                        'ewaste': .01,
                        'textiles': .08}

    capture_rates = {'trash_primary': .75,
                    'trash_secondary': .25,
                    'mgp': .30,
                    'cardboard': .50,
                    'paper': .20}
    
    def get_capture_rate(TDS, CONS_TDS):
        try:
            trash_primary = diversion_rates.loc[diversion_rates['TDS']==TDS]['TRASH_PRIMARY']
            trash_secondary = diversion_rates.loc[diversion_rates['TDS']==TDS]['TRASH_SECONDARY']
        except:
            try:
                trash_primary = diversion_rates.loc[diversion_rates['CONS_TDS']==CONS_TDS]['TRASH_PRIMARY']
                trash_secondary = diversion_rates.loc[diversion_rates['CONS_TDS']==CONS_TDS]['TRASH_SECONDARY']
            except:
                trash_primary = .75
                trash_secondary = .25
        
        return [trash_primary, trash_secondary]
    
    overview_data['WASTE_TONS_DAY'] = overview_data['CURRENT_APTS'].apply(lambda x: x * conversion_factors['units_to_tons_day'])
    overview_data['TRASH_PRIMARY'] = overview_data.apply(lambda row: get_capture_rate(row['TDS'], row['CONS_TDS'])[0], axis=1)
    overview_data['TRASH_SECONDARY'] = overview_data.apply(lambda row: get_capture_rate(row['TDS'], row['CONS_TDS'])[1], axis=1)
    
    for key, value in waste_percentages.items():
        overview_data[f'{key.upper()}_CY'] = overview_data['WASTE_TONS_DAY'].apply(lambda x: x * value * conversion_factors['cy_per_ton'][key])
        overview_data[f'{key.upper()}_TONS'] = overview_data['WASTE_TONS_DAY'].apply(lambda x: x * value)
    
    overview_data['TRASH_ACTUAL_CY'] = (overview_data['TRASH_CY']+
                                           overview_data['MGP_CY']+
                                           overview_data['CARDBOARD_CY']+
                                           overview_data['PAPER_CY']+
                                           overview_data['ORGANICS_CY']+
                                           overview_data['EWASTE_CY']+
                                           overview_data['TEXTILES_CY'])-(overview_data['MGP_CY']*capture_rates['mgp']+
                                                                         overview_data['CARDBOARD_CY']*capture_rates['cardboard']+
                                                                         overview_data['PAPER_CY']*capture_rates['paper'])

    overview_data['TRASH_CHUTE_CY'] = overview_data['TRASH_ACTUAL_CY']*overview_data['TRASH_PRIMARY']
    overview_data['TRASH_CHUTE_TONS'] = overview_data['TRASH_ACTUAL_TONS']*overview_data['TRASH_PRIMARY']
    overview_data['TRASH_CHUTE_SAUSAGE'] = ((overview_data['TRASH_CHUTE_CY'])/conversion_factors['cy_per_ton']['trash'])*(2000/40)
    overview_data['TRASH_DROP_CY'] = overview_data['TRASH_ACTUAL_CY']*overview_data['TRASH_SECONDARY']
    overview_data['TRASH_DROP_TONS'] = overview_data['TRASH_ACTUAL_TONS']*overview_data['TRASH_SECONDARY']
    overview_data['TRASH_DROP_BINS'] = overview_data['TRASH_DROP_CY']*conversion_factors['gallons_per_cy']/64
    overview_data['CAPTURED_MGP_TONS_WEEK'] = overview_data['MGP_TONS']*capture_rates['mgp']*7
    overview_data['CAPTURED_CARDBOARD_TONS_WEEK'] = overview_data['CARDBOARD_TONS']*capture_rates['cardboard']*7
    overview_data['CAPTURED_PAPER_TONS_WEEK'] = overview_data['PAPER_TONS']*capture_rates['paper']*7
    overview_data['MGP_BAGS_WEEK'] = overview_data['MGP_CY']*capture_rates['mgp']*7/conversion_factors['cy_per_44gal_bag']
    overview_data['PAPER_BAGS_WEEK'] = overview_data['PAPER_CY']*capture_rates['paper']*7/conversion_factors['cy_per_44gal_bag']
    overview_data['CARDBOARD_BALES_WEEK'] = overview_data['CARDBOARD_CY']*capture_rates['cardboard']*7/conversion_factors['cy_per_cardboard_bale']
    
    actual_tonnage = pd.read_csv('DATA/WASTE_TONNAGE_2017.csv').dropna()
    overview_data = overview_data.merge(actual_tonnage, on='DEV_NAME', how='left')
    
    return overview_data

In [59]:
def make_waste_distribution_table(cons_tds, overview_data, wsa_data):
    cons_data = overview_data[overview_data['CONS_TDS'] == cons_tds]
    num_devs = cons_data.shape[0]
    
    if num_devs == 1:
        num_cols = num_devs
    elif num_devs > 4:
        num_cols_1 = math.ceil(num_devs/2)
        num_cols_2 = (num_devs-num_cols_1)+1
    else:
        num_cols = num_devs+1
    
    if num_devs != 1:
        cons_data.loc['Total']= cons_data.sum(numeric_only=True, axis=0)
        cons_data.loc['Total','DEV_NAME'] = 'Total'
    
    def make_waste_distribution_table_block(cons_data, num_cols):
        dev_col_format = r'X|'

        opening = r'''
        \begin{tabularx}{\textwidth}{V{1.25in}|%s}
        \cline{2-%s}
        ''' % (dev_col_format*num_cols, (num_cols+1))

        top_row = r'''
                                                                       '''+(r"& \multicolumn{1}{V{1.25in}|}{\cellcolor{ccorange}{\color[HTML]{FFFFFF}%s}}"*(num_cols))+r"\tnhl"+'\n'

        standard_row = r"\multicolumn{1}{|V{1.25in}|}{\cellcolor{ccorangelight}%s}                 "+(r"& %s                                    ")*num_cols+r"\tnhl"+'\n'

        captured_row = r"\multicolumn{1}{|Y{1.25in}|}{\cellcolor{ccorangelight}Captured / Week (tons)\tnote{4}}                        "+(r"& %s                                    ")*num_cols+r"\tnhl"+'\n'

        #chute_row = r"\multicolumn{1}{|Y{1.25in}|}{\cellcolor{ccorangelight}Trash Chutes\tnote{3}}                 "+(r"& %s tons [%s 40 lbs. sausage bags]      "*num_cols)+r"\tnhl"+'\n'
        chute_row = r"\multicolumn{1}{|Y{1.25in}|}{\cellcolor{ccorangelight}Trash Chutes\tnote{3}}                 "+(r"& %s sausage bags      "*num_cols)+r"\tnhl"+'\n'

        #dropsite_row = r"\multicolumn{1}{|Y{1.25in}|}{\cellcolor{ccorangelight}Drop Sites\tnote{4}}                 "+(r"& %s tons [%s 64-gal. bins]      "*num_cols)+r"\tnhl"+'\n'
        dropsite_row = r"\multicolumn{1}{|Y{1.25in}|}{\cellcolor{ccorangelight}Drop Sites\tnote{4}}                 "+(r"& %s 64-gal. bins      "*num_cols)+r"\tnhl"+'\n'

        OET_row = r"\multicolumn{1}{|V{1.25in}|}{\cellcolor{ccorangelight}%s / Day (CY)}              "+(r"& %s                                    "*num_cols)+r"\tnhl"+'\n'

        #recycling_row = r"\multicolumn{1}{|V{1.25in}|}{\cellcolor{ccorangelight}%s \tnote{5}}                 "+(r"& %s tons [%s 44-gal. bags]                                   ")*num_cols+r"\tnhl"+'\n'
        recycling_row = r"\multicolumn{1}{|V{1.25in}|}{\cellcolor{ccorangelight}%s \tnote{6}}                 "+(r"& %s 44-gal. bags                                   ")*num_cols+r"\tnhl"+'\n'

        #cardboard_row = r"\multicolumn{1}{|V{1.25in}|}{\cellcolor{ccorangelight}%s \tnote{5}}                 "+(r"& %s tons [%s bales]                                   ")*num_cols+r"\tnhl"+'\n'
        cardboard_row = r"\multicolumn{1}{|V{1.25in}|}{\cellcolor{ccorangelight}%s \tnote{6}}                 "+(r"& %s bales                                   ")*num_cols+r"\tnhl"+'\n'


        def make_trash_text(row, text_var, cy_col, other_col):
            text_var.append(round(row[cy_col],1))
            text_var.append(round(row[other_col], 1))
            pass

        latex_block = r'\textbf{Projected Daily Trash Volumes}'
        latex_block += opening
        latex_block += top_row % tuple(cons_data['DEV_NAME'].apply(lambda x: clean_text(str(x).title())).tolist())
        latex_block += standard_row % tuple([r"Waste Generated / Day (Tons)\tnote{1}"]+[round(item, 1) for item in cons_data['WASTE_TONS_DAY'].tolist()])
        latex_block += standard_row % tuple([r"Trash / Day (tons)\tnote{2}"]+cons_data['TRASH_ACTUAL_TONS'].apply(lambda x: str(round(x,1))).tolist())

        trash_chute_text = []
        dropsite_text = []

        #cons_data.apply(lambda row: make_trash_text(row, trash_chute_text, 'TRASH_CHUTE_TONS', 'TRASH_CHUTE_SAUSAGE'), axis=1)
        #cons_data.apply(lambda row: make_trash_text(row, dropsite_text, 'TRASH_DROP_TONS', 'TRASH_DROP_BINS'), axis=1)

        #latex_block += chute_row % tuple(trash_chute_text)
        #latex_block += dropsite_row % tuple(dropsite_text)
        latex_block += chute_row % tuple([round(item, 1) for item in cons_data['TRASH_CHUTE_SAUSAGE'].tolist()])
        latex_block += dropsite_row % tuple([round(item, 1) for item in cons_data['TRASH_DROP_BINS'].tolist()])
        latex_block += standard_row % tuple([r"Est. Drop Sites \tnote{5}"]+cons_data['BLDGS'].apply(lambda x: str(int(x))).tolist())

        
        latex_block += r"\end{tabularx}\bigskip"
        
        latex_block += r'\textbf{Projected Weekly Recycling Volumes}'
        latex_block += opening
        latex_block += top_row % tuple(cons_data['DEV_NAME'].apply(lambda x: clean_text(str(x).title())).tolist())

        mgp_text = []
        cardboard_text= []
        paper_text = []

        #cons_data.apply(lambda row: make_trash_text(row, mgp_text, 'CAPTURED_MGP_TONS_WEEK', 'MGP_BAGS_WEEK'), axis=1)

        #cons_data.apply(lambda row: make_trash_text(row, cardboard_text, 'CAPTURED_CARDBOARD_TONS_WEEK', 'CARDBOARD_BALES_WEEK'), axis=1)

        #cons_data.apply(lambda row: make_trash_text(row, paper_text, 'CAPTURED_PAPER_TONS_WEEK', 'PAPER_BAGS_WEEK'), axis=1)



        latex_block += recycling_row % tuple([r"Metal, Glass, Plastic Captured / Week (tons)"]+[round(item, 1) for item in cons_data['MGP_BAGS_WEEK'].tolist()])
        #latex_block += captured_row % tuple(cons_data['CAPTURED_MGP_CY'].apply(lambda x: str(round(x,2))).tolist())
        latex_block += cardboard_row % tuple([r"Cardboard Captured / Week (tons)"]+[round(item, 1) for item in cons_data['CARDBOARD_BALES_WEEK'].tolist()])
        #latex_block += captured_row % tuple(cons_data['CAPTURED_CARDBOARD_CY'].apply(lambda x: str(round(x,2))).tolist())
        latex_block += recycling_row % tuple([r"Paper Captured / Week (tons)"]+[round(item, 1) for item in cons_data['PAPER_BAGS_WEEK'].tolist()])
        #latex_block += captured_row % tuple(cons_data['CAPTURED_PAPER_CY'].apply(lambda x: str(round(x,2))).tolist())

        #latex_block += OET_row % tuple(['Organics']+cons_data['ORGANICS_CY'].apply(lambda x: str(round(x,2))).tolist())
        #latex_block += OET_row % tuple(['E-Waste']+cons_data['EWASTE_CY'].apply(lambda x: str(round(x,2))).tolist())
        #latex_block += OET_row % tuple(['Textiles']+cons_data['TEXTILES_CY'].apply(lambda x: str(round(x,2))).tolist())

        latex_block += r"\end{tabularx}"

        return latex_block
    
    if num_devs<= 4:
        latex_block = make_waste_distribution_table_block(cons_data, num_cols)

        with open(f'TABLES/waste_distribution_table/{cons_tds}_wd_table.tex', 'w') as file_handle:
            file_handle.write(latex_block)
    
    else:
        latex_block_1 = make_waste_distribution_table_block(cons_data.iloc[0:num_cols_1], num_cols_1)
        latex_block_2 = make_waste_distribution_table_block(cons_data.iloc[num_cols_1:], num_cols_2)
        
        with open(f'TABLES/waste_distribution_table/{cons_tds}_wd_table_1.tex', 'w') as file_handle:
            file_handle.write(latex_block_1)
        with open(f'TABLES/waste_distribution_table/{cons_tds}_wd_table_2.tex', 'w') as file_handle:
            file_handle.write(latex_block_2)
            
    '''

    text_block = r''''''

    text_line_multi = r"{%s}: This development has %s apartment units and %s stairhalls.\\"

    text_line_singular = r"{%s}: This development has %s apartment units and one stairhall.\\"

    for row in cons_data.itertuples():

        if int(row.STAIRHALLS) == 1:
            text_block += text_line_singular % (clean_text(row.DEV_NAME.title()), int(row.CURRENT_APTS))
        else:
            text_block += text_line_multi % (clean_text(row.DEV_NAME.title()), int(row.CURRENT_APTS), int(row.STAIRHALLS))


    with open(f'TEXT/waste_distribution_bottom/{cons_tds}_wd_bottom.tex', 'w') as file_handle:
        file_handle.write(text_block)
    
    '''
    with open(f'TEXT/waste_distribution_bottom/{cons_tds}_wd_bottom.tex', 'w') as file_handle:
        file_handle.write('')  
        
    top_block_template = r'''
    Quantifying how much waste is generated at each consolidation will inform how well current assets and services serve current needs, and what additional elements are necessary for each consolidation to operate as efficiently as possible.
    
    %s has %s 30-cubic yard exterior compactors. %s'''
    
    #wsa_data = load_wsa_data()
    wsa_data = wsa_data.query(f"TDS in {counts[cons_tds]['developments']}")
    extcomp_total = int(wsa_data['EXT_COMP_BE'].sum())
    #print(cons_data['TRASH_ACTUAL_CY'])
    #print(cons_data['TRASH_ACTUAL_CY'].sum())
    #days_to_fill = extcomp_total*(cons_data['TRASH_ACTUAL_CY'].sum())/(30)
    weight_df = cons_data[['TDS','DEV_NAME','TONS_PER_CONTAINER']].dropna()
    
    if weight_df.shape[0] > 0:
        weight_at_collection = round(weight_df['TONS_PER_CONTAINER'].mean(),1)
        weight_text = f'On average, the exterior compactors at this consolidation contain {weight_at_collection} tons of waste at the time of collection. DSNY prefers compactors to contain more than 7 tons and up to 12 tons at collection. The closer to 12 tons, the more efficient collection is for both DSNY and the consolidation.'
    else:
        weight_at_collection = None
        weight_text = 'The average weight of DSNY collections at this consolidation are unknown.'
    
    top_block_data = []
    top_block_data.append(clean_text(str(consolidations[cons_tds]['name']).title()))
    
    if extcomp_total > 0:
        top_block_data.append(f'({str(extcomp_total)})')
    else:
        top_block_data.append('no')
        
    top_block_data.append(weight_text)
    
    with open(f'TEXT/waste_distribution_top/{cons_tds}_wd_top.tex', 'w') as file_handle:
        file_handle.write(top_block_template % tuple(top_block_data))
    
    #print(top_block_data)
    
    

    

In [21]:
add_waste_cols(load_overview_data()).to_csv('OVERVIEW_WITH_WASTE.csv')

In [60]:
overview_data = add_waste_cols(load_overview_data())
wsa_data = load_wsa_data()
for tds in consolidations.keys():
    try:
        make_waste_distribution_table(tds, overview_data, wsa_data)
    except:
        print(f'Exception raised by {tds}')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Exception raised by NaN


#### Make Capital Improvements Table

In [36]:
def load_asset_data():
    asset_data = {'fwd': ['In-Sink Food Grinders', pd.read_csv('DATA/capital_fwd.csv')],
                  'ehd': ['Enlarged Hopper Doors', pd.read_csv('DATA/capital_ehd.csv')],
                  'int_compactor':['Interior Compactor Replacement', pd.read_csv('DATA/capital_intcom.csv')],
                  'wasteyard':['Waste Yard Redesign', pd.read_csv('DATA/capital_wasteyard.csv')]}

    for value in asset_data.values():
        value[1].columns = [item.strip() for item in value[1].columns]

    asset_data['wasteyard'][1]['ESTIMATE'] = asset_data['wasteyard'][1]['TOT_EST']
    asset_data['wasteyard'][1]['COST'] = np.nan
    
    def year_to_string(year):
        if pd.isna(year):
            return 'N/A'
        else:
            if int(year) <= 2022:
                return str(int(year))
            elif (int(year) > 2022) & (int(year) <= 2025):
                return '2023-2025'
            elif (int(year)>2025) and (int(year)<=2030):
                return '2026-2030'
            else:
                return 'After 2030'
    
    asset_data['fwd'][1]['_YEAR'] = asset_data['fwd'][1]['EST_YEAR'].apply(lambda x: year_to_string(x))
    asset_data['ehd'][1]['_YEAR'] = asset_data['ehd'][1]['CYEAR'].apply(lambda x: year_to_string(x))
    asset_data['int_compactor'][1]['_YEAR'] = asset_data['int_compactor'][1]['CYEAR'].apply(lambda x: year_to_string(x))
    asset_data['wasteyard'][1]['_YEAR'] = asset_data['wasteyard'][1]['CONS_CYEAR'].apply(lambda x: year_to_string(x))
    return asset_data

In [37]:
def make_capital_table(cons_tds, asset_data, overview_data=overview_data):
    cons_data = overview_data[overview_data['CONS_TDS'] == cons_tds]
    num_devs = cons_data.shape[0]

    def make_capital_table_block(block_data, num_devs):
        dev_col_format = r'X|'
        header = r'''
        \begin{tabularx}{\textwidth}{r|%s}
        \cline{2-%s}
        ''' % ((dev_col_format*num_devs), num_devs)

        top_row = r"\multicolumn{1}{l|}{}                                                        "+r"& \cellcolor{ccorange}{\color[HTML]{FFFFFF}%s} "*num_devs+r"\\ \hline"+"\n"

        project_block = r"\multicolumn{1}{|V{.2\columnwidth}|}{\cellcolor{ccorangelight}%s}          "+(r"&                                                                  "*num_devs)+r"\\"+r'''
        \multicolumn{1}{|r|}{\cellcolor{ccorangelight}\textit{Status}}                '''+(r"& %s                                                         "*num_devs)+r'''\\
        \multicolumn{1}{|r|}{\cellcolor{ccorangelight}\textit{%s}}                  '''+("& %s                                                     "*num_devs)+r"\\ \hline"+"\n"

        devs = block_data['DEV_NAME'].apply(lambda x: str(x).upper()).tolist()
        devs_title = block_data['DEV_NAME'].apply(lambda x: clean_text(str(x).title())).tolist()
        latex_block = ''
        latex_block += header
        latex_block += top_row % tuple(block_data['DEV_NAME'].apply(lambda x: clean_text(str(x).title())).tolist())

        for asset in asset_data.keys():
            asset_df = asset_data[asset][1]
            #print(asset_data[asset][0])
            #print(devs)
            #print(asset_df['DEVELOPMENT'].tolist())
            if any((dev in asset_df['DEVELOPMENT'].tolist()) for dev in devs):
                status_list = []
                year_list = []

                for dev in devs:
                    if dev in asset_df['DEVELOPMENT'].tolist():
                        #print(dev)
                        if pd.isna(asset_df.loc[asset_df['DEVELOPMENT']== dev,'STATUS'].iloc[0]):
                            status_list.append('Not Yet Scheduled')
                        else:
                            status_list.append(str(asset_df.loc[asset_df['DEVELOPMENT']== dev, 'STATUS'].iloc[0]).title())

                        #print(asset_df.loc[asset_df['DEVELOPMENT']== dev, 'STATUS'])
                        #print(asset_df.loc[asset_df['DEVELOPMENT']== dev, 'COST'])
                        #try:
                        year_list.append(str(asset_df.loc[asset_df['DEVELOPMENT']== dev,'_YEAR'].iloc[0]))
                    #except:
                            #year_list.append('TBD')

                    else:
                        status_list.append('N/A')
                        year_list.append(' ')

                asset_block = project_block % tuple([asset_data[asset][0]]+status_list+['Year Planned']+year_list)

                latex_block += asset_block

        latex_block += r"\end{tabularx}"

        return latex_block
    
    
    if num_devs <= 4:
        num_cols = num_devs
        block_data = cons_data
        
        with open(f"TABLES/capital_projects_table/{cons_tds}_capital_projects.tex", 'w') as file_handle:
            file_handle.write(make_capital_table_block(block_data, num_cols))
        
    elif num_devs > 4:
        num_cols_1 = math.ceil(num_devs/2)
        num_cols_2 = (num_devs-num_cols_1)
        block_data_1 = cons_data.iloc[0:num_cols_1]
        block_data_2 = cons_data.iloc[num_cols_1:]
        
        with open(f"TABLES/capital_projects_table/{cons_tds}_capital_projects_1.tex", 'w') as file_handle:
            file_handle.write(make_capital_table_block(block_data_1, num_cols_1))
            
        with open(f"TABLES/capital_projects_table/{cons_tds}_capital_projects_2.tex", 'w') as file_handle:
            file_handle.write(make_capital_table_block(block_data_2, num_cols_2))
    
    pass
    

In [38]:
asset_data= load_asset_data()
overview_data = load_overview_data()
for tds in consolidations.keys(): 
    make_capital_table(tds, asset_data, overview_data)

#### Make Staff Table

In [79]:
def load_staff_data(name_dict):
    #Read budgeted staff and formula allocation
    dev_staff = pd.read_csv('DATA/staff_for_table.csv')
    dev_staff.fillna(0,inplace=True)
    
    def find_cons_tds(name, name_dict):
        corrections = {'East River CONSOLIDATED':'009',
                      'FOREST CONSOLIDATION': '059',
                      'WILSON CONSOLIDATED': '112',
                      'WOODSON CONSOLIDATED': '182',
                      'BEACH 41ST STREET CONSOLIDATION':'165',
                      'TAYLOR-WYTHE CONSOLIDATED': '234'}
        for key, value in name_dict.items():
            if (name == value['name']) | (name in value['alternates']):
                return key
            else:
                try:
                    return corrections[name]
                except:
                    pass
            
    dev_staff['CONS_TDS'] = dev_staff['Consolidation'].apply(lambda x: find_cons_tds(x, name_dict))
    dev_staff['CONS_NAME'] = dev_staff['CONS_TDS'].apply(lambda x: name_dict[x]['name'] if x is not None else 'NO NAME FOUND')
    #Note: Staff list missing for Armstrong, Ft. Washington, and Williams Plaza, as well as scatter-site third-party-managed consolidations
    dev_staff.to_csv('DATA/_TROUBLESHOOTING_DEV_STAFF.csv')
    #Read budgeted staff and actuals
    actuals_data = pd.read_csv('DATA/Staffing_Analysis/DEVHC.csv')
    actuals_data.fillna(0, inplace=True)
    actuals_data = actuals_data[actuals_data['RC Name'].apply(lambda x: "total" not in str(x).lower()) & actuals_data['Department'].apply(lambda x: "total" not in str(x).lower())]
    actuals_data['CONS_TDS'] = actuals_data['RC Name'].apply(lambda x: find_cons_tds(x, name_dict))
    actuals_data['CONS_NAME'] = actuals_data['CONS_TDS'].apply(lambda x: name_dict[x]['name'] if x is not None else 'NO NAME FOUND')

    def convert_neg(x):
        try:
            return int(x)
        except:
            return int('-'+str(x).replace('(','').replace(')',''))

    actuals_data['VARIANCE'] = actuals_data['Unnamed: 5'].apply(lambda x: convert_neg(x))
    actuals_data['ACT'] = actuals_data['13']
    
    table_frame = pd.read_csv('DATA/Table_Keys.csv')
    actuals_keys = pd.read_csv('DATA/Staffing_Analysis/DEVHC_CODES.csv')
    
    actuals_data = actuals_data.merge(actuals_keys, how='left', left_on='CST_NAME', right_on='TITLE_NAME')
    for column in ['Current Modified', 'ACT', 'VARIANCE']:
        actuals_data[column] = actuals_data[column].astype(int)
        
    actuals_data.to_csv('DATA/_TROUBLESHOOTING_ACTUALS.csv')
    
    return (dev_staff, actuals_data, table_frame, actuals_keys)

In [80]:
def make_staff_table(cons_tds, dev_staff, actuals_data, table_frame, actuals_keys):
    #Fetching staff data for consolidation
    cons_data = dev_staff.loc[dev_staff['CONS_TDS'] == cons_tds]
    if cons_data.shape[0] == 0:
        with open(f'TABLES/staff_table/{cons_tds}_staff_table.tex', 'w') as file_handle:
            file_handle.write('')
        return(f'Consolidation {cons_tds} not found in staffing data.')
    #print(cons_tds)
    #print(dev_staff)
    
    # Isolate and process actuals data for consolidation
    try:
        cons_actuals = actuals_data[actuals_data['CONS_TDS'] == cons_tds]
    except:
        print(f'{cons_tds} not found in actuals.')
        return np.NaN
    
    cons_actuals = cons_actuals[['CONS_NAME', 'CONS_TDS', 'Current Modified', 'ACT', 
                                 'CODE_KEY', 'CODE_NAME']].groupby(by='CODE_KEY', as_index=False).agg({'CONS_NAME': 'first',
                                                                                                       'CONS_TDS': 'first',
                                                                                                     'Current Modified':sum,
                                                                                                     'ACT':sum,
                                                                                                     'CODE_NAME':'first'})
    cons_actuals
    cons_actuals.loc['Total']= cons_actuals.sum(numeric_only=True, axis=0)
    cons_actuals.loc['Total','CODE_KEY'] = 11
    cons_actuals.loc['Total','CODE_NAME'] = 'TOT'
    
    for row in cons_actuals.itertuples():
        cons_data[f'{row.CODE_NAME}_ACT'] = row.ACT
    #print(cons_data)
    #Setting up table and transposing data
    cons_table_frame = table_frame
    cons_table_frame['Formula'] = cons_table_frame['FORMULA_KEY'].iloc[:-1].apply(lambda key: cons_data[key].iloc[0])
    cons_table_frame['Budgeted'] = cons_table_frame['BUDG_KEY'].apply(lambda key: cons_data[key].iloc[0])
    cons_table_frame['Actual'] = cons_table_frame['ACTUALS_KEY'].iloc[:-2].apply(lambda key: cons_data[key].iloc[0] if key in cons_data.columns else 0)

    
    #Simplifying table
    cons_table = cons_table_frame[['CHART_LINE', 'Formula', 'Budgeted', 'Actual']]
    #print(cons_table)
    
    #Defining LaTeX table format
    
    def make_staff_table_block(staff_data):
    
        table_template = r'''
        \begin{tabular}{l|c|c|c|}
        \cline{2-4}
                                                                                     & \cellcolor{ccfuschia}{\color[HTML]{FFFFFF} Formula Allocation \tnote{1}} & \cellcolor{ccfuschia}{\color[HTML]{FFFFFF} Budgeted} & \cellcolor{ccfuschia}{\color[HTML]{FFFFFF} Actual Staff (June 2020)} \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Employees}                      & %s                                                      & %s                                                                & %s                                                        \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Property Manager}               & %s                                                      & %s                                                                & %s                                                       \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Asst. Property Manager}         & %s                                                      & %s                                                                & %s                                                       \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Secretaries}                    & %s                                                      & %s                                                                & %s                                                      \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Housing Assistants}             & %s                                                      & %s                                                                & %s                                                      \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Superintendent}                 & %s                                                      & %s                                                                & %s                                                      \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Assistant Superintendent}       & %s                                                      & %s                                                                & %s                                                      \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Supervisor of Caretakers (SOC)} & %s                                                      & %s                                                                & %s                                                      \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Supervisor of Grounds (SOG)}    & %s                                                      & %s                                                                & %s                                                      \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Maintenance Workers}            & %s                                                      & %s                                                                & %s                                                       \\ \hline
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Caretakers X}                   & %s                                                      & %s                                                                &                                                       \\ \cline{1-3}
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Caretakers J\tnote{2}}                   &                                                       & %s                                                                &                                                         \\ \cline{1-1} \cline{3-3}
        \multicolumn{1}{|l|}{\cellcolor{ccfuschialight}Caretakers G}                   & \multirow{-2}{*}{%s}                                                      & %s                                     & \multirow{-3}{*}{%s \tnote{3}}                           \\ \hline
        \end{tabular}
        
        '''

        values = []

        def extract_data_through_mw(row):
            [values.append(item) for item in [str(int(row['Formula'])), 
                                              str(int(row['Budgeted'])), 
                                              str(int(row['Actual']))]]
            pass

        #Processing through Maintenance Worker
        staff_data.iloc[0:-3].apply(lambda row: extract_data_through_mw(row), axis=1)

        #Processing Caretakers
        values.append(str(int(staff_data.iloc[-3, 1])))
        values.append(str(int(staff_data.iloc[-3, 2])))
        values.append(str(int(staff_data.iloc[-2, 2])))
        values.append(str(int(staff_data.iloc[-2, 1])))
        values.append(str(int(staff_data.iloc[-1, 2])))
        values.append(str(int(staff_data.iloc[-3, 3])))

        return table_template % tuple(values)
    
    #Make and export LaTeX code
    with open(f'TABLES/staff_table/{cons_tds}_staff_table.tex', 'w') as file_handle:
        file_handle.write(make_staff_table_block(cons_table))

In [81]:
def load_staff_description_text():
    text = get_docx_text('TEXT/WM_Role_Descriptions.docx')
    return clean_text(text)

In [82]:
staff_data = load_staff_data(consolidations)
#staff_descriptions = load_staff_description_text()

for tds in consolidations.keys():
    make_staff_table(tds, *staff_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### Make Analysis Layout

In [47]:
# Process File Names
def process_analysis_graphic_paths():
    
    def clean_paths(path_list):
        clean_path_list = []
        for path in path_list:
            if (' ' in path.split('/')[-1]) or ('&' in path.split('/')[-1]):
                os.rename(path, path.replace(' ','-').replace('&','-'))
                clean_path_list.append(path)
            else:
                clean_path_list.append(path)
            
        return clean_path_list
            
    cons_bar_charts_raw = list(glob.glob('WORK_ORDER_ANALYSIS/Consolidation_BarCharts/png/*'))
    cons_bar_charts = clean_paths(cons_bar_charts_raw)
    
    tds_nums = [path.split('/')[-1].split('_')[0].zfill(3) for path in cons_bar_charts]

    cons_chart_paths = {}
    for pair in list(zip(tds_nums, cons_bar_charts)):
        cons_chart_paths[pair[0]] = pair[1]
    
    dev_chart_paths = {}

    dev_bar_charts_raw = glob.glob('WORK_ORDER_ANALYSIS/Development_BarCharts/png/*')
    dev_bar_charts = clean_paths(dev_bar_charts_raw)
    
    dev_tds_nums = [path.split('/')[-1].split('_')[0].zfill(3) for path in dev_bar_charts]

    for pair in list(zip(dev_tds_nums, dev_bar_charts)):
        dev_chart_paths[pair[0]] = {'Development_BarCharts': pair[1]}

    for directory in ['Dev_Interior_Comp_Repair_BarCharts', 'Dev_Exterior_Comp_Repair_BarCharts']:
        paths_raw = glob.glob(f'WORK_ORDER_ANALYSIS/{directory}/png/*')
        paths = clean_paths(paths_raw)
        tds_nums = [path.split('/')[-1].split('_')[3].zfill(3) for path in paths]

        for pair in list(zip(tds_nums, paths)):
            try:
                dev_chart_paths[pair[0]][directory] = pair[1]
            except:
                dev_chart_paths[pair[0]] = {directory: pair[1]}
                
    return (cons_chart_paths, dev_chart_paths)


In [48]:
def make_image_layout(tds, cons_chart_paths, dev_chart_paths, cons_dict):
    analysis_image_layout = r''''''
    
    cons_devs = cons_dict[tds]['developments']
    
    dev_bar_paths = []
    dev_int_paths = []
    dev_ext_paths = []
    
    for dev in cons_devs:
        try:
            dev_bar_paths.append(dev_chart_paths[dev]['Development_BarCharts'])
        except:
            pass
        
        try:
            dev_int_paths.append(dev_chart_paths[dev]['Dev_Interior_Comp_Repair_BarCharts'])
        except:
            pass
        
        try:
            dev_ext_paths.append(dev_chart_paths[dev]['Dev_Exterior_Comp_Repair_BarCharts'])
        except:
            pass
    
    #Adding consolidation and development bar charts
    bar_charts_heading = r'''\begin{center}
                                \tablehead{\hspace{1cm}\\}
                                \tabletail{\hspace{1cm}\\}
                                \begin{supertabular}{p{0.5\textwidth}p{0.5\textwidth}}
                                \shrinkheight{1in}
                                \multicolumn{2}{p{\textwidth}}{The following bar charts show how frequently various types of maintenance issue -- including compactor-related problems, pest problems, and plumbing issues -- occur in compactor locations consolidation-wide as well as at major developments.} \\
                                \multicolumn{2}{c}{\includegraphics[width=0.6\textwidth]{\rootpath/'''+cons_chart_paths[tds]+r'''}} \\
                                '''
    
    if len(dev_bar_paths) > 1:
        analysis_image_layout += bar_charts_heading
        
        i = 0
        while i < len(dev_bar_paths):
            if (len(dev_bar_paths)-i) >= 2:
                analysis_image_layout += r'''\includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_bar_paths[i]+r'''} & \includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_bar_paths[i+1]+r'''} \\
                                        '''
            else:
                analysis_image_layout += r'''\includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_bar_paths[i]+r'''} &  \hspace{1cm} \\
                                        '''
            
            i += 2
        analysis_image_layout += r'\end{supertabular}'+'\n'+r'\end{center}'+'\n'
        
    elif len(dev_bar_paths) == 1:
        analysis_image_layout += bar_charts_heading
        analysis_image_layout += r'''\multicolumn{2}{c}{\includegraphics[width=0.6\textwidth]{\rootpath/'''+dev_bar_paths[0]+r'''}} \\
                                    \end{supertabular}
                                    \end{center}
                                    '''
    else:
        analysis_image_layout += bar_charts_heading.replace(' as well as at major developments','').replace('bar charts show','bar chart shows')
        analysis_image_layout += r'\end{supertabular}'+'\n'+r'\end{center}'+'\n'
        
    
    #Adding interior compactor section, including tables
    int_comp_heading = r'''
                        \begin{center}
                        \tablehead{\hspace{1cm}\\}
                        \tabletail{\hspace{1cm}\\}
                        \begin{supertabular}{p{0.5\textwidth}p{0.5\textwidth}}
                        \multicolumn{2}{p{\textwidth}}{The following figures highlight repairs conducted in interior compactor locations at each major development, as well as within up to five buildings at each development.} \\
                        '''
    if len(dev_int_paths) > 1:
        analysis_image_layout += int_comp_heading
        
        i = 0
        while i < len(dev_int_paths):
            if (len(dev_int_paths)-i) >= 2:
                analysis_image_layout += r'''\includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_int_paths[i]+r'''} & \includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_int_paths[i+1]+r'''} \\
                                        '''
            else:
                analysis_image_layout += r'''\includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_int_paths[i]+r'''} &  \hspace{1cm} \\
                                        '''
            i += 2
            
        analysis_image_layout += r'\multicolumn{2}{c}{\input{\rootpath/WORK_ORDER_ANALYSIS/Dev_Interior_Comp_Repair_Tables/\tds_repair_table}} \\'+'\n'
        analysis_image_layout += r'\end{supertabular}'+'\n'+r'\end{center}'+'\n'
        
    elif len(dev_int_paths) == 1:
        analysis_image_layout += int_comp_heading
        analysis_image_layout += r'''\multicolumn{2}{c}{\includegraphics[width=0.6\textwidth]{\rootpath/'''+dev_int_paths[0]+r'''}} \\
                                    \multicolumn{2}{c}{\input{\rootpath/WORK_ORDER_ANALYSIS/Dev_Interior_Comp_Repair_Tables/\tds_repair_table}} \\
                                    \end{supertabular}
                                    \end{center}
                                    '''
    else:
        analysis_image_layout += int_comp_heading.replace('at each major development, as well as within','in').replace('figures highlight','tables highlight')
        analysis_image_layout += r'\multicolumn{2}{c}{\input{\rootpath/WORK_ORDER_ANALYSIS/Dev_Interior_Comp_Repair_Tables/\tds_repair_table}} \\'+'\n'
        analysis_image_layout += r'''\end{supertabular}
                                    \end{center}
                                    '''
    
    #Adding exterior compactor charts
    ext_comp_heading = r'''
                        \begin{center}
                        \tablehead{\hspace{1cm}\\}
                        \tabletail{\hspace{1cm}\\}
                        \begin{supertabular}{p{0.5\textwidth}p{0.5\textwidth}}
                        \multicolumn{2}{p{\textwidth}}{The following charts examine repairs made at exterior compactor locations at major developments.} \\
                        '''
    if len(dev_ext_paths) > 1:
        analysis_image_layout += ext_comp_heading
        
        i = 0
        while i < len(dev_ext_paths):
            if (len(dev_ext_paths)-i) >= 2:
                analysis_image_layout += r'''\includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_ext_paths[i]+r'''} & \includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_ext_paths[i+1]+r'''} \\
                                        '''
            else:
                analysis_image_layout += r'''\includegraphics[width=0.45\textwidth]{\rootpath/'''+dev_ext_paths[i]+r'''} &  \hspace{1cm} \\
                                        '''
            
            i += 2
        analysis_image_layout += r'\end{supertabular}'+'\n'+r'\end{center}'+'\n'
        
    elif len(dev_ext_paths) == 1:
        analysis_image_layout += ext_comp_heading.replace('charts examine', 'chart examines').replace(' at major developments','')
        analysis_image_layout += r'''\multicolumn{2}{c}{\includegraphics[width=0.6\textwidth]{\rootpath/'''+dev_ext_paths[0]+r'''}} \\
                                    \end{supertabular}
                                    \end{center}
                                    '''
    else:
        pass
        
    with open(f'WORK_ORDER_ANALYSIS/image_layouts/{tds}_layout.tex', 'w') as file_handle:
        file_handle.write(analysis_image_layout)

In [49]:
cons_chart_paths, dev_chart_paths = process_analysis_graphic_paths()
for tds in consolidations.keys():
    try:
        make_image_layout(tds, cons_chart_paths, dev_chart_paths, consolidations)
    except:
        print(f'{tds} raised exception')

210 raised exception
128 raised exception
NaN raised exception


### Assembling Appendices

#### Site Plans

In [44]:
def process_site_plans():
    plan_format = re.compile(r'[A-z0-9\s]+_[0-9]{3}_[A-z\s]+.pdf')
    site_plan_pdf_candidates = glob.glob('APPENDICES/site_plans/*/*/*.*', recursive=True)
    site_plan_pdfs = [path for path in site_plan_pdf_candidates if bool(plan_format.match(path.split('/')[-1]))]

    for f in site_plan_pdf_candidates:
        if f not in site_plan_pdfs:
            os.remove(f)
        else:
            tds = f.split('/')[-1].split('_')[1]
            try:
                file = convert_from_path(f, dpi=300, single_file=True)[0]
                path = f'APPENDICES/site_plans/{tds}.png'
                height = file.height
                width = file.width
                if file.width > file.height:
                    file = file.rotate(90, expand=True)#.resize((height, width))
                if file.height > 3300:
                    scale_ratio = 3300/float(file.height)
                    new_height = 3300
                    new_width = int(float(file.width)*scale_ratio)
                    file = file.resize((new_width, new_height))
                file.save(path, 'PNG')
                #print(f"saved {tds}")
            except:
                print(f'{tds} raised exception')
                
    site_plans = glob.glob('APPENDICES/site_plans/*.png')
    dev_list = [path.split('/')[-1].split('.')[0] for path in site_plans]
    for cons, content in consolidations.items():
        img_list = []
        pdf_filename = f'APPENDICES/site_plans/{cons}.pdf'
        for dev in content['developments']:
            if dev in dev_list:
                img_list.append(Image.open(f'APPENDICES/site_plans/{dev}.png'))
        if len(img_list)==1:
            img_list[0].save(pdf_filename, 'PDF', resolution=300.0)
        elif len(img_list)>1:
            img_list[0].save(pdf_filename, 'PDF', resolution=300.0, save_all=True, append_images=img_list[1:])
        else:
            pass
        

In [45]:
site_plans = glob.glob('APPENDICES/site_plans/*.png')
dev_list = [path.split('/')[-1].split('.')[0] for path in site_plans]
for cons, content in consolidations.items():
    img_list = []
    pdf_filename = f'APPENDICES/site_plans/{cons}.pdf'
    try:
        for dev in content['developments']:
            if dev in dev_list:
                img_list.append(Image.open(f'APPENDICES/site_plans/{dev}.png'))
    except:
        print(f'{cons} raised exception')
    if len(img_list)==1:
        img_list[0].save(pdf_filename, 'PDF', resolution=300.0)
    elif len(img_list)>1:
        img_list[0].save(pdf_filename, 'PDF', resolution=300.0, save_all=True, append_images=img_list[1:])
    else:
        pass
    

NaN raised exception


#### Floorplans

In [48]:
floor_plan_paths = glob.glob('APPENDICES/floorplans/*')

candidate_list = []
for key, value in developments.items():
    candidate_list.append(str(value['name']).upper())
    for item in value['name_alternates']:
        candidate_list.append(item.upper())

def get_dev_name(name):
    match = process.extractOne(str(name).upper(), candidate_list)[0]
    #print(match)
    for key, value in developments.items():
        if (match.upper() == value['name'].upper()) or (match.upper() in [val.upper() for val in value['name_alternates']]):
            return value['name']

    return '!!!NOT FOUND'


def get_tds_from_name(x):
    for key, value in developments.items():
        if x == value['name']:
            return key
        
    return 'N/A'

floor_plan_names = [get_dev_name(path.split('/')[-1].replace('.pdf','')) for path in floor_plan_paths]
floor_plan_tds = [get_tds_from_name(name) for name in floor_plan_names]

floor_plans = pd.DataFrame(data=list(zip(floor_plan_tds, floor_plan_names, floor_plan_paths)), columns=['TDS', 'NAME', 'PATH'])

In [49]:
pd.set_option('display.max_rows', None)
floor_plans.to_csv('APPENDICES/floorplans/floor_plans_for_screening.csv')

### Making and Compiling LaTeX Files

In [17]:
#pdfjam --nup 2x1 --openright 'true' --frame 'true' REPORTS/tds_report.pdf --outfile tds_spread.pdf --landscape

359 raised exception: <class 'subprocess.CalledProcessError'>
210 raised exception: <class 'subprocess.CalledProcessError'>
128 raised exception: <class 'subprocess.CalledProcessError'>
NaN raised exception: <class 'KeyError'>
