In [1]:
import pandas as pd
import json
from jinja2 import Environment, FileSystemLoader
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import os
import base64
from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import simplejson

### Get any additional Parameters

In [2]:
#PeriodRemovals = "2022-23;2023-24;2021-23;2019-23"
PeriodRemovals = "2023-24"
PeriodRemovals = PeriodRemovals.split(";")
PeriodRemovals

['2023-24']

### Get All Data

In [3]:
AllData = pd.read_csv('Data Files//Indicator Data.csv', converters={'Changes': json.loads,'Previous_Row': json.loads}).query(f'LocalAuthority == "Stirling" & `Data Type` == "Real_Annual"')
AllData = AllData[~AllData.Period.isin(PeriodRemovals)]

### Create Custom LatestData Based on Parameters

In [4]:
LatestData = AllData.copy(deep=True)
LatestData.sort_values(by=['Data Type','LocalAuthority', 'Code', 'Period'], inplace=True)
LatestData = LatestData.groupby(['Data Type', 'LocalAuthority', 'Code']).tail(1)
LatestData = LatestData[['Code', 'Period', 'Value', 'FamilyRank', 'ScotRank', 'Previous_Row', 'Changes']]

In [5]:

# Get indicator info from csv and merge titles into LatestData
Info = pd.read_csv('Data Files//Indicator Information.csv')
LatestData = LatestData.merge(Info[['Code_Sortable', 'Title', 'Format_Python', 'AdditionalAxisDenominator_Python', 'SubGroup_PythonReport', 'Group_PythonReport']], how='left', left_on=['Code'], right_on=['Code_Sortable'], suffixes=('_latest', '_info'))

# Add additional column that cotains formatted value strings
def formatvalue(df):
    df['Value_Formatted'] = df['Format_Python'].format(df['Value']/df['AdditionalAxisDenominator_Python'])
    return df['Value_Formatted']

LatestData['Value_Formatted'] = LatestData.apply(formatvalue, axis=1)

# Add additional column containing slice of Family Rank data for each indicator.
AllData = AllData.merge(Info[['Code_Sortable', 'Format_Python', 'AdditionalAxisDenominator_Python',]], how='left', left_on=['Code'], right_on=['Code_Sortable'], suffixes=('_latest', '_info'))
AllData['Value_Formatted'] = AllData.apply(formatvalue, axis = 1)
fgtimeseries = []
valuetimeseries = []
for row in LatestData.itertuples() :
    Code = '"' + row.Code + '"'
    fgtimeseries.append(AllData.query(f'Code == {Code}')[['Period', 'FamilyRank']].set_index('Period').to_dict()['FamilyRank'])
    valuetimeseries.append(AllData.query(f'Code == {Code}')[['Period', 'Value_Formatted']].set_index('Period').to_dict()['Value_Formatted'])
    
results = []
for idx, item in enumerate(fgtimeseries) :
    result = {}
    for key in (item | valuetimeseries[idx]):
        if key in item: result.setdefault(key, []).append(item[key])
        if key in valuetimeseries[idx]: result.setdefault(key, []).append(valuetimeseries[idx][key])
    results.append(result)
        
results
LatestData['timeseries'] = results

# Set the index of the dataframe to the code and then convert to a dictionary
LatestDataDict = LatestData.set_index('Code').to_dict(orient="index")

# Data for select indicators can now be accessed through dictionary keys as below
LatestDataDict['C&L 01']

{'Period': '2021-22',
 'Value': 14.5581,
 'FamilyRank': 8.0,
 'ScotRank': 30.0,
 'Previous_Row': {'Value': 218.9586,
  'Numerator': 3683348.5,
  'Denominator': 16822.0,
  'ScotRank': 31.0,
  'ScotPct': 0.96875,
  'FamilyRank': 8.0,
  'FamilyPct': 1.0},
 'Changes': {'ScotRank_ChangeSincePrevious': 1.0,
  'ScotPct_ChangeSincePrevious': 0.03125,
  'FamilyRank_ChangeSincePrevious': -0.0,
  'FamilyPct_ChangeSincePrevious': -0.0,
  'ScotRank_ChangeSinceFirst': -20.0,
  'ScotPct_ChangeSinceFirst': -0.625,
  'FamilyRank_ChangeSinceFirst': -5.0,
  'FamilyPct_ChangeSinceFirst': -0.625,
  'Value_ChangeSincePrevious': -204.4005,
  'Numerator_ChangeSincePrevious': 198482.7999999998,
  'Denominator_ChangeSincePrevious': 249833.0,
  'Value_ChangeSinceFirst': 11.6251,
  'Numerator_ChangeSinceFirst': 1081889.7999999998,
  'Denominator_ChangeSinceFirst': -687412.0,
  'PercentChange_AimAdjusted_SincePrevious': 0.9335120885866095,
  'PercentChange_AimAdjusted_SinceFirst': -3.963552676440505},
 'Code_Sorta

### Load Latest Data Grouped by Groups and SubGroups into Dictionary for Use When Creating Overview Pages

In [6]:
# Define function to retrieve a specified nested value from the LatestDataDict
def getValueFromDict(Code, ParentField, Field) :
    return LatestDataDict.get(Code).get(ParentField).get(Field)

# Copy latestdata with relevant columns
GroupedLatestData = LatestData[['Group_PythonReport','SubGroup_PythonReport','Code_Sortable','Title','Period','Value_Formatted','Format_Python','AdditionalAxisDenominator_Python','FamilyRank','ScotRank']].copy(deep=True)

# Loop nested list that defines field extraction from dictionary required and add each as a separate column
FieldsRequired = [
    ['Changes','Value_ChangeSincePrevious'],
    ['Changes','PercentChange_AimAdjusted_SincePrevious'],
    ['Changes','FamilyRank_ChangeSincePrevious'],
    ['Changes','ScotRank_ChangeSincePrevious'],
    ['Previous_Row','Value'],
    ['Previous_Row','FamilyRank'],
    ['Previous_Row','ScotRank']
    ]
for i in FieldsRequired :
    GroupedLatestData[f'{i[0]}_{i[1]}'] = GroupedLatestData.apply(lambda df: getValueFromDict(df['Code_Sortable'],i[0],i[1]), axis = 1)

# Format Previous Row Real Value
GroupedLatestData['Previous_Row_Value'] = GroupedLatestData.apply(lambda df: df['Format_Python'].format(df['Previous_Row_Value']/df['AdditionalAxisDenominator_Python']), axis = 1)

# Remove and reorder columns as required
GroupedLatestData = GroupedLatestData[[
    'Group_PythonReport', 'SubGroup_PythonReport', 'Code_Sortable', 'Title', 'Period', 'Value_Formatted', 'Previous_Row_Value', 
    'Changes_Value_ChangeSincePrevious', 'Changes_PercentChange_AimAdjusted_SincePrevious', 'FamilyRank', 'Previous_Row_FamilyRank', 'Changes_FamilyRank_ChangeSincePrevious', 'ScotRank', 'Previous_Row_ScotRank', 'Changes_ScotRank_ChangeSincePrevious']]

ISCategories = GroupedLatestData['Group_PythonReport'].unique()
GroupedLatestDataDict = {}
for Category in ISCategories :
    GroupedLatestDataDict[f'{Category}'] = {}
    SubCategories = GroupedLatestData.query(f'Group_PythonReport == "{Category}"')['SubGroup_PythonReport'].unique()
    for SubCategory in SubCategories :
        GroupedLatestDataDict[f'{Category}'][f'{SubCategory}'] = {}
        Indicators = GroupedLatestData.query(f'SubGroup_PythonReport == "{SubCategory}" & Group_PythonReport == "{Category}"')['Code_Sortable'].unique()
        for Indicator in Indicators :
            GroupedLatestDataDict[f'{Category}'][f'{SubCategory}'][f'{Indicator}'] = list(GroupedLatestData.query(f'Code_Sortable == "{Indicator}"')[['Title', 'Period', 'Value_Formatted', 'Previous_Row_Value', 'Changes_Value_ChangeSincePrevious', 'Changes_PercentChange_AimAdjusted_SincePrevious', 'FamilyRank', 'Previous_Row_FamilyRank', 'Changes_FamilyRank_ChangeSincePrevious', 'ScotRank', 'Previous_Row_ScotRank', 'Changes_ScotRank_ChangeSincePrevious']].to_dict('index').values())[0]
            
GroupedLatestDataDict


{'Economy': {'Leisure Facilities & Attractions': {'C&L 01': {'Title': 'Cost per Attendance at Sports Facilities',
    'Period': '2021-22',
    'Value_Formatted': '£ 14.56',
    'Previous_Row_Value': '£ 218.96',
    'Changes_Value_ChangeSincePrevious': -204.4005,
    'Changes_PercentChange_AimAdjusted_SincePrevious': 0.9335120885866095,
    'FamilyRank': 8.0,
    'Previous_Row_FamilyRank': 8.0,
    'Changes_FamilyRank_ChangeSincePrevious': -0.0,
    'ScotRank': 30.0,
    'Previous_Row_ScotRank': 31.0,
    'Changes_ScotRank_ChangeSincePrevious': 1.0},
   'C&L 02': {'Title': 'Cost per Library Visit',
    'Period': '2021-22',
    'Value_Formatted': '£ 5.28',
    'Previous_Row_Value': '£ 3.13',
    'Changes_Value_ChangeSincePrevious': 2.1454999999999997,
    'Changes_PercentChange_AimAdjusted_SincePrevious': -0.6854194620152066,
    'FamilyRank': 5.0,
    'Previous_Row_FamilyRank': 4.0,
    'Changes_FamilyRank_ChangeSincePrevious': -1.0,
    'ScotRank': 22.0,
    'Previous_Row_ScotRank': 11

### Setup Jinja2 Environment, Define CreateGroupPage and SavetoPDF functions and then create all pages from templates.

In [7]:
env = Environment(loader=FileSystemLoader('Templates_Jinja2'))

### Define Repeated Functions

In [8]:
formatter = HTMLFormatter(indent=4)

def formatandwrite (htmlout,layoutfilename):
    prettifiedhtml = BeautifulSoup(htmlout, "html.parser").prettify(formatter=formatter)
    file = open("Pages_FinalReport/" + layoutfilename, "w")
    file.write(prettifiedhtml)
    file.close()

def CreateGroupPage(layoutfilename) :
    template = env.get_template(layoutfilename)
    htmlout = template.render(LatestData = LatestDataDict)
    formatandwrite(htmlout,layoutfilename)
    
def CreateOverviewPage(layoutfilename):
    template = env.get_template(layoutfilename)
    htmlout = template.render(GroupedLatestDataDict = GroupedLatestDataDict)
    formatandwrite(htmlout, layoutfilename)

def CreateContentsPage(layoutfilename):
    template = env.get_template(layoutfilename)
    htmlout = template.render()
    formatandwrite(htmlout, layoutfilename)

def savetopdf (htmlpath,pdfpath) :
    service = Service(ChromeDriverManager().install())
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    filepath = os.path.abspath(htmlpath)
    driver = webdriver.Chrome(service=service, options=chrome_options)
    
    print_settings = {
        "recentDestinations": [{
            "id": "Save as PDF",
            "origin": "local",
            "account": "",
        }],
        "selectedDestinationId": "Save as PDF",
        "version": 2,
        "isHeaderFooterEnabled": False,
        "isLandscapeEnabled": True,
        
    }

    
    driver.get(f'file://{filepath}')
    pdf_data = driver.execute_cdp_cmd("Page.printToPDF", print_settings)
    driver.close()
    
    with open(f'{filepath}.pdf', "wb") as f:
        f.write(base64.b64decode(pdf_data['data']))

### Create HTML Pages from Jinja2 Templates

In [9]:
TemplateDirectory = os.fsencode('Templates_Jinja2')
for file in os.listdir(TemplateDirectory):
    filename = os.fsdecode(file)
    if '.Group_' in filename :
        CreateGroupPage(filename)
    elif '.Overview' in filename:
        CreateOverviewPage(filename)
    elif filename == '00.FrontCover.html':
        template = env.get_template(filename)
        htmlout = template.render(ReportYear="2021/22", ReportTitle="Local Government Benchmarking Framework", ReportTitleDescription="Stirling Council by Theme", ImgPath="https://user-images.githubusercontent.com/104562880/184315997-fbdde8f9-a331-49b1-82c9-7fb8ebba86e2.png")
        file = open("Pages_FinalReport/" + filename, "w")
        file.write(htmlout)
        file.close()
    elif '.Contents' in filename:
        CreateContentsPage(filename)

### Write pages to PDF (WIP)

In [10]:
"""
#This works when used with the installed chromedriver. One issue remains where the output pdfs have a white space at top and bottom that cannot be removed by use of no header tags etc. pdf does not register it as an actual header. Issue does not exist if printed manually in chrome.

HTMLDirectory = os.fsencode('Pages_FinalReport')
for file in os.listdir(HTMLDirectory):
    filename = os.fsdecode(file)
    if '.html' in filename :     
        htmlpath = f'Pages_FinalReport//{filename}'
        pdffilename = filename.replace('.html','.pdf')
        pdfpath = f'Pages_PDF//{pdffilename}'
        savetopdf(htmlpath,pdfpath)
"""

"\n#This works when used with the installed chromedriver. One issue remains where the output pdfs have a white space at top and bottom that cannot be removed by use of no header tags etc. pdf does not register it as an actual header. Issue does not exist if printed manually in chrome.\n\nHTMLDirectory = os.fsencode('Pages_FinalReport')\nfor file in os.listdir(HTMLDirectory):\n    filename = os.fsdecode(file)\n    if '.html' in filename :     \n        htmlpath = f'Pages_FinalReport//{filename}'\n        pdffilename = filename.replace('.html','.pdf')\n        pdfpath = f'Pages_PDF//{pdffilename}'\n        savetopdf(htmlpath,pdfpath)\n"