Updates the data descriptions and Data Sharing Hub forms for conformity results.

In [25]:
import os
import csv
from pathlib import Path, PurePath

import docx
import docx2pdf

In [26]:
cycle_code = 'C20Q4'
release_date = 'January 14, 2021'
analysis_years = {
  '100': '2015',
  '200': '2020',
  '300': '2025',
  '400': '2030',
  '600': '2040',
  '700': '2050'
}
template1 = './templates/DataDescriptionTemplate.docx'
template2 = './templates/DSHFormTemplate.docx'
dir_results = '..'

In [27]:
def copy_trip_totals(from_csv, to_table, analysis_year):
    col_trip_totals = to_table.column_cells(3)
    header_cell = col_trip_totals[0]
    header_cell.paragraphs[0].runs[0].text = analysis_year
    with open(from_csv) as f:
        reader = csv.DictReader(f, fieldnames=['matrix_id', 'trip_type', 'trips'])
        i = 1
        for row in reader:
            cell = col_trip_totals[i]
            trip_total = round(float(row['trips']))
            cell.paragraphs[0].runs[0].text = '{:,}'.format(trip_total)
            i += 1

In [28]:
def generate_data_description(cycle_code, year_code, release_date):
    replacement_text = {
      'RELEASE_DATE': release_date,
      'ANALYSIS_YEAR': analysis_years[year_code],
      'RELEASE_MONTH': release_date.split()[0],
      'RELEASE_YEAR': release_date.split()[2],
      'CYCLE_CODE_L': cycle_code.lower(),
      'CYCLE_CODE_U': cycle_code.upper(),
      'YEAR_CODE': year_code
    }
    
    # Replace generic text in template
    template = docx.Document(template1)
    for paragraph in template.paragraphs:
        for run in paragraph.runs:
            for tag, replacement in replacement_text.items():
                if tag in run.text:
                    orig_text = run.text
                    new_text = orig_text.replace(tag, replacement)
                    run.text = new_text
    
    # Fill in trip totals
    dir_matches = []
    dir_matches.extend(Path(dir_results).glob('output/{}_{}*'.format(cycle_code.lower(), year_code)))
    dir_dataset = dir_matches[0]
    pth_trip_totals = PurePath(dir_dataset).joinpath('trips/totals.csv')
    tbl_trips = template.tables[3]
    copy_trip_totals(pth_trip_totals, tbl_trips, replacement_text['ANALYSIS_YEAR'])
    pth_wrktrip_totals = PurePath(dir_dataset).joinpath('trips/work_trips/totals.csv')
    tbl_wrktrips = template.tables[4]
    copy_trip_totals(pth_wrktrip_totals, tbl_wrktrips, replacement_text['ANALYSIS_YEAR'])
    
    # Save as PDF
    f_out = 'DataDescription_{}_{}_{}{}.docx'.format(
      cycle_code.upper(),
      replacement_text['ANALYSIS_YEAR'],
      replacement_text['RELEASE_MONTH'],
      replacement_text['RELEASE_YEAR']
    )
    pth_out = PurePath(dir_dataset).joinpath(f_out)
    template.save(pth_out)
    docx2pdf.convert(pth_out)
    os.remove(pth_out)

In [29]:
def generate_dsh_form(cycle_code, year_code, release_date):
    dir_matches = []
    dir_matches.extend(Path(dir_results).glob('output/{}_{}*'.format(cycle_code.lower(), year_code)))
    dir_dataset = dir_matches[0]
    replacement_text = {
      '_LOCATION': str(PurePath('M:/catslib/modelprod').joinpath('{}/results/output/{}'.format(cycle_code.lower(), dir_dataset.stem))),
      'ANALYSIS_YEAR': analysis_years[year_code],
      'RELEASE_MONTH': release_date.split()[0],
      'RELEASE_YEAR': release_date.split()[2],
      'CYCLE_CODE_L': cycle_code.lower(),
      'CYCLE_CODE_U': cycle_code.upper(),
      'YEAR_CODE': year_code,
      '_QUARTER': cycle_code[-2:].upper(),
      'VINTAGE_YEAR': '20' + cycle_code[1:3]
    }
    
    # Replace generic text in template
    template = docx.Document(template2)
    for paragraph in template.paragraphs:
        for run in paragraph.runs:
            for tag, replacement in replacement_text.items():
                if tag in run.text:
                    orig_text = run.text
                    new_text = orig_text.replace(tag, replacement)
                    run.text = new_text
    
    # Replace generic text in template tables
    for table in template.tables:
        for cell in table.column_cells(1):
            for paragraph in cell.paragraphs:
                for run in paragraph.runs:
                    for tag, replacement in replacement_text.items():
                        if tag in run.text:
                            orig_text = run.text
                            new_text = orig_text.replace(tag, replacement)
                            run.text = new_text
                            
    # Save DOCX                        
    f_out = 'DSHForm_{}_{}_AnalysisYear_{}_InputsAndOutputs.docx'.format(
      replacement_text['VINTAGE_YEAR'],
      replacement_text['_QUARTER'],
      replacement_text['ANALYSIS_YEAR']
    )
    pth_out = PurePath(dir_results).joinpath(f_out)
    template.save(pth_out)




In [30]:
for year_code in analysis_years.keys():
    print('Generating {} {} docs...'.format(cycle_code, year_code))
    generate_data_description(cycle_code, year_code, release_date)
    generate_dsh_form(cycle_code, year_code, release_date)
print('Finished.')

Generating C20Q4 100 docs...


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

Generating C20Q4 200 docs...



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

Generating C20Q4 300 docs...



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

Generating C20Q4 400 docs...



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

Generating C20Q4 600 docs...



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

Generating C20Q4 700 docs...



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

Finished.
