In [935]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [936]:
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
import functools
import json
import re
from typing import Optional, List

import altair as alt
from dataclasses_json import dataclass_json
import numpy as np
import pandas as pd

In [937]:
output_dir = "/home/pvh/Documents/code/pvh-forks/presidential-employment-stimulus/data"

### Data structure

Each department has a total budget and total opportunities target. 

The overall programme has outcome targets
1. Jobs created
2. Jobs retained
3. Livelihoods supported

Each department has a "blurb" describing their programme.

Within each department there are multiple programmes that can contribute to each of these targets.

Each programme has a demographic split of outcomes, with gender and youth percentages.

Each programme has a per-province split of outcomes.

Files:

`Consolidated data (Dec) - Presidential Employment Stimulus.xlsx` - December sheet

`Consolidated Presidential Employment Stimulus Reporting Template.xlsx` - January sheet

In [938]:
december_excel = "Consolidated data (Dec) - Presidential Employment Stimulus.xlsx"
january_excel = "Consolidated Presidential Employment Stimulus Reporting Template.xlsx"
doh_january_excel = (
    "DoH_Consolidated Appointment of Nurses - Verified by Provinces.xlsx"
)

jan_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 170321 categories.xlsx"
)
opportunity_targets_df = pd.read_excel(
    jan_opportunities_excel, sheet_name="Targets", header=None
)
opportunity_achievements_df = pd.read_excel(
    jan_opportunities_excel, sheet_name="Trends", header=None
)

opportunity_type_df = pd.concat(
    [opportunity_targets_df.iloc[2:56, 1], opportunity_targets_df.iloc[2:56, 4]], axis=1
).set_index(1)

december_sheets = pd.ExcelFile(december_excel).sheet_names
january_sheets = pd.ExcelFile(january_excel).sheet_names

provinces = [
    "Eastern Cape",
    "Free State",
    "Gauteng",
    "KwaZulu-Natal",
    "Limpopo",
    "Mpumalanga",
    "North West",
    "Northern Cape",
    "Western Cape",
]
province_abbreviations = ["EC", "FS", "GP", "KZN", "LP", "NW", "NC", "WC"]

```
interface DepartmentMonth {
  month: number // 202101
  name: string // Basic Education
  lead: string // Strengthening the learning environment in schools
  paragraph: string
  sections: Array<{
    name: string // Budget allocated to date
    matrics: Array<{
      name: string // Educational and general assistants
      type: 'currency' | 'count'
      value: number
      valueTarget?: number
      time?: {
        name: string // spend
        values: Array<{
          month: number // 202101
          value: number
        }>
      }
      gender?: {
        name: string // opportunities
        values: Array<{
          gender: 'female' | 'male'
          value: number
        }>
      }
      age?: {
        name: string // opportunities
        values: Array<{
          age: string // 18-35
          value: number
        }>
      }
      province?: {
        name: string // opportunities
        values: Array<{
          province: 'EC' | 'FS' | 'GP' | 'KZN' | 'LP' | 'MP' | 'NC' | 'NW' | 'WC'
        }>
      }
    }>
  }>
}
```

## 

In [939]:
SectionEnum = Enum(
    "Section", "targets budget_allocated job_opportunities jobs_retain livelihoods"
)

MetricTypeEnum = Enum("MetricType", "currency count")

ProvinceEnum = Enum("Province", "EC FS GP KZN LP MP NC NW WC")

province_to_abbrev = {
    "Free State": "FS",
    "Gauteng": "GP",
    "KwaZulu-Natal": "KZN",
    "Limpopo": "LP",
    "Mpumalanga": "MP",
    "North West": "NW",
    "Northern Cape": "NC",
    "Western Cape": "WC",
    "Eastern Cape": "EC",
}

GenderEnum = Enum("Gender", "Male Female")


@dataclass_json
@dataclass
class TimeValue:
    month: int  # encoding month as in 202101
    name: str  # human readable time period name
    value: int


@dataclass_json
@dataclass
class AgeValue:
    age_category: str  # 18-35 or youth?
    value: int


@dataclass_json
@dataclass
class GenderValue:
    gender: str  # enum: 'female' or 'male'
    value: int


@dataclass_json
@dataclass
class ProvinceValue:
    province: str  # enum: 'EC' | 'FS' | 'GP' | 'KZN' | 'LP' | 'MP' | 'NC' | 'NW' | 'WC'
    value: int


@dataclass_json
@dataclass
class TimeValues:
    name: str
    values: List["TimeValue"]


@dataclass_json
@dataclass
class ProvinceValues:
    name: str
    values: List["ProvinceValue"]


@dataclass_json
@dataclass
class AgeValues:
    name: str
    values: List["AgeValue"]


@dataclass_json
@dataclass
class GenderValues:
    name: str
    values: List["GenderValue"]


@dataclass_json
@dataclass
class Metric:
    name: str
    metric_type: str  # enum of 'currency', 'count'
    value: int
    time: Optional[TimeValues]
    gender: Optional[GenderValues]
    age: Optional[AgeValues]
    province: Optional[ProvinceValues]
    value_target: int = -1


@dataclass_json
@dataclass
class Section:
    name: str
    section_type: str  # enum of 'targets', 'budget_allocated', 'job_opportunities', 'jobs_retain', 'livelihoods'
    metrics: List["Metric"]


@dataclass_json
@dataclass
class Department:
    month: int  # the month of latest data
    name: str
    sheet_name: str
    lead: str
    paragraph: str
    target_lines: Optional[List[int]]
    achievement_lines: Optional[List[int]]
    sections: List["Section"]


@dataclass_json
@dataclass
class DepartmentValue:
    department: str  # TODO: should be enum
    value: int


@dataclass_json
@dataclass
class DepartmentValues:
    name: str
    values: List["DepartmentValue"]


@dataclass_json
@dataclass
class OverviewMetric:
    name: str
    metric_type: str  # enum of 'currency', 'count'
    value: int
    time: Optional[TimeValues]
    #     department: DepartmentValues
    value_target: int = -1


@dataclass_json
@dataclass
class OverviewSection(Section):
    metrics: List["OverviewMetric"]


@dataclass_json
@dataclass
class Overview:
    month: int
    name: str  # Would normally be "Programme Overview"
    lead: str
    paragraph: str
    overview_sections: List["OverviewSection"]


@dataclass_json
@dataclass
class Everything:
    overview: Overview
    departments: List["Department"]

# Top level structure

In [940]:
all_data = Everything(
    overview=Overview(
        month=202103,
        name="Programme overview",
        lead="About the programme",
        paragraph="nothing yet",
        overview_sections=[],
    ),
    departments=[],
)

## DTIC

In [956]:
department_name = "Trade, Industry and Competition"
sheet_name = "DTIC"

department = Department(
    month=202012,
    name=department_name,
    sheet_name=sheet_name,
    lead="Piloting new models for re-shoring and expanding global business services",
    paragraph="The Global Business Services Sector has an impressive track record. Established in 2006/7 to provide offshore customer service delivery, the sector has built from a low base to achieve an average year-on-year export revenue growth of at least 20% since 2014.",
    sections=[],
    target_lines=[38, 39],
    achievement_lines=[38, 39],
)

budget_target = 120_000 * 1000
opportunities_target = 8_000
opportunities_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
]

# Programme targets for this department
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Budget",
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Programme beneficiaries",
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_achieved,
            value_target=opportunities_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

december_df = pd.read_excel(december_excel, sheet_name=sheet_name, header=None)

oct_nov_spend = december_df.iloc[48, 1]
dec_spend = december_df.iloc[49, 1]

programme_name = "Global Business Services Sector expansion"

# budget allocated to date

# removed this section for now
# section = Section(
#     name="Budget allocated to date",
#     section_type=SectionEnum.budget_allocated.name,
#     metrics=[
#         Metric(
#             name=programme_name,
#             metric_type=MetricTypeEnum.currency.name,
#             value=dec_spend,
#             value_target=budget_target,
#             time=TimeValues(
#                 name="Spent over time",
#                 values=[
#                     TimeValue(month=202011, name="Nov '20", value=oct_nov_spend),
#                     TimeValue(month=202012, name="Dec '20", value=dec_spend),
#                 ],
#             ),
#             gender=None,
#             age=None,
#             province=None,
#         )
#     ],
# )

# department.sections.append(section)

# Job opportunities created to date

total_programme_jobs = december_df.iloc[28:37, 1].sum()
percentage_male = december_df.iloc[41, 1]
percentage_female = december_df.iloc[42, 1]
percentage_youth = december_df.iloc[43, 1]


by_province_df = december_df.iloc[29:37, :2].set_index(0)

province_values = ProvinceValues(name="Opportunities by Province", values=[])
for province in list(by_province_df.index):
    pv = ProvinceValue(
        province=province_to_abbrev[province], value=int(by_province_df.loc[province])
    )
    province_values.values.append(pv)

section = Section(
    name="Job opportunities created to date",
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.count.name,
            value=total_programme_jobs,
            time=TimeValues(
                name="Employed over time",
                values=[
                    TimeValue(month=202012, name="Dec '20", value=total_programme_jobs)
                ],
            ),
            gender=GenderValues(
                name="Opportunities by Gender",
                values=[
                    GenderValue(gender=GenderEnum.Male.name, value=percentage_male),
                    GenderValue(gender=GenderEnum.Female.name, value=percentage_female),
                ],
            ),
            age=AgeValues(
                name="Percentage of opportunities going to 18-35 year olds",
                values=[AgeValue(age_category="18-35", value=percentage_youth)],
            ),
            province=province_values,
        )
    ],
)

department.sections.append(section)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)
# print(department.to_json(indent=2))

## DBE

In [943]:
department_name = "Basic Education"
sheet_name = "DBE"

department = Department(
    month=202103,
    name=department_name,
    sheet_name=sheet_name,
    lead="Teachers assistants and other support for schools",
    paragraph="A key priority identified in the National Development Plan is the improvement of quality education, skills development, and innovation. One intervention that has seen some experimentation in South Africa, with significant potential to scale nationally, is the use of school assistants to strengthen the learning environment. An important rationale for school assistants is the need to support teachers in the classroom, freeing up time for teaching and providing additional support to learners to improve education outcomes.",
    sections=[],
    target_lines=[2, 5],
    achievement_lines=[3, 6],
)

budget_target = 7_000_000 * 1000
opportunities_target = 344_933


december_df = pd.read_excel(
    december_excel,
    sheet_name=sheet_name,
    header=None,
)
january_df = pd.read_excel(
    january_excel,
    sheet_name=sheet_name,
    header=None,
)

assistants_budget = january_df.iloc[42, 1]
post_saving_budget = january_df.iloc[42, 2]

# Programme targets for this department
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
#         Metric(
#             name="Budget for Education and General Assistant Posts",
#             metric_type=MetricTypeEnum.currency.name,
#             value=assistants_budget,
#             time=None,
#             gender=None,
#             age=None,
#             province=None,
#         ),
#         Metric(
#             name="Budget for Saving Vulnerable Posts",
#             metric_type=MetricTypeEnum.currency.name,
#             value=post_saving_budget,
#             time=None,
#             gender=None,
#             age=None,
#             province=None,
#         ),
        Metric(
            name="Programme beneficiaries",
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

# NOTE: removed programme spends for now
# programme_spends = [
#     dict(
#         name="Education and general assistants",
#         amounts=[december_df.iloc[44, 1], january_df.iloc[44, 1]],
#     ),
#     dict(
#         name="Vulnerable  posts saved",
#         amounts=[december_df.iloc[44, 2], january_df.iloc[44, 2]],
#     ),
# ]

# metrics = []
# months = [202101, 202103]
# month_names = ["Jan '21", "Mar '21"]

# for programme in programme_spends:
#     values = []
#     total_value = 0
#     for i, value in enumerate(programme["amounts"]):
#         values.append(TimeValue(month=months[i], name=month_names[i], value=value))
#         total_value += value
#     metric = Metric(
#         name=programme["name"],
#         metric_type=MetricTypeEnum.currency.name,
#         value=total_value,
#         value_target=budget_target,
#         time=TimeValues(name="Spent over time", values=values),
#         gender=None,
#         age=None,
#         province=None,
#     )
#     metrics.append(metric)

# programme_name = "Global Business Services Sector expansion"

# # budget allocated to date
# section = Section(
#     name="Budget allocated to date",
#     section_type=SectionEnum.budget_allocated.name,
#     metrics=metrics,
# )

# department.sections.append(section)

# Job opportunities created to date

ed_assistants_by_prov_df = january_df.iloc[10:19, :2].set_index(0)
gen_assistants_by_prov_df = january_df.iloc[10:19, :3].drop(1, axis=1).set_index(0)

job_opportunity_programmes = [
    dict(
        name="Education assistants",
        totals=[december_df.iloc[19, 1], january_df.iloc[19, 1]],
        province=ProvinceValues(
            name="Opportunities by Province",
            values=[
                ProvinceValue(
                    province=province_to_abbrev[province],
                    value=ed_assistants_by_prov_df.loc[province],
                )
                for province in list(ed_assistants_by_prov_df.index)
            ],
        ),
    ),
    dict(
        name="General assistants",
        totals=[december_df.iloc[19, 2], january_df.iloc[19, 2]],
        province=ProvinceValues(
            name="Opportunities by Province",
            values=[
                ProvinceValue(
                    province=province_to_abbrev[province],
                    value=gen_assistants_by_prov_df.loc[province],
                )
                for province in list(gen_assistants_by_prov_df.index)
            ],
        ),
    ),
]

metrics = []
for programme in job_opportunity_programmes:
    metric = Metric(
        name=programme["name"],
        metric_type=MetricTypeEnum.count.name,
        time=TimeValues(
            name="Employed over time",
            values=[
                TimeValue(month=months[i], name=month_names[i], value=value)
                for i, value in enumerate(programme["totals"])
            ],
        ),
        gender=None,
        age=None,
        province=programme["province"],
        value=programme["totals"][-1],
    )
    metrics.append(metric)

section = Section(
    name="Job opportunities created to date",
    section_type=SectionEnum.job_opportunities.name,
    metrics=metrics,
)

department.sections.append(section)

# Jobs retained

jobs_retained_over_time = [december_df.iloc[19, 3], january_df.iloc[19, 3]]
jobs_retained_over_time_df = (
    january_df.iloc[10:19, :4].drop([1, 2], axis=1).set_index(0)
)
section = Section(
    name="Jobs Retained",
    section_type=SectionEnum.jobs_retain.name,
    metrics=[
        Metric(
            name="Vulnerable posts saved",
            metric_type=MetricTypeEnum.count.name,
            time=TimeValues(
                name="Jobs saved over time",
                values=[
                    TimeValue(
                        month=months[i],
                        name=month_names[i],
                        value=[
                            value for i, value in enumerate(jobs_retained_over_time)
                        ],
                    )
                ],
            ),
            value=jobs_retained_over_time[-1],
            gender=None,
            age=None,
            province=ProvinceValues(
                name="Posts saved by Province",
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=gen_assistants_by_prov_df.loc[province],
                    )
                    for province in list(gen_assistants_by_prov_df.index)
                ],
            ),
        )
    ],
)

department.sections.append(section)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)
# print(department.to_json(indent=2))

## DSD

In [944]:
department_name = "Social Development"
sheet_name = "DSD"

department = Department(
    month=202103,
    name=department_name,
    sheet_name=sheet_name,
    lead="Income support to practitioners and to the implementation of Covid compliance measures",
    paragraph="Livelihoods from the provision of Early Childhood Development services were severely disrupted by the pandemic, with providers facing challenges with re-opening. There are costs associated with doing so safely, and some parents can no longer afford to pay fees as a result of job losses.",
    sections=[],
    target_lines=[5, 8],
    achievement_lines=[6, 9],
)


budget_target = 588_728 * 1000
beneficiaries_target = 111_142


december_df = pd.read_excel(
    december_excel,
    sheet_name=sheet_name,
    header=None,
)
january_df = pd.read_excel(
    january_excel,
    sheet_name=sheet_name,
    header=None,
)

social_workers_budget = january_df.iloc[42, 1]
registration_support_budget = int(
    float(re.sub(r"[^\d]+([\d.]+).*", r"\1", december_df.iloc[42, 2])) * 1_000_000
)  # pull number out of R 16.5 million
# Programme targets for this department
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Budget for social workers",
            metric_type=MetricTypeEnum.currency.name,
            value=social_workers_budget,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Budget for registration support officers",
            metric_type=MetricTypeEnum.currency.name,
            value=registration_support_budget,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Programme beneficiaries",
            metric_type=MetricTypeEnum.count.name,
            value=beneficiaries_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

# budget allocated to date

social_worker_spend_dec = int(
    float(december_df.iloc[44, 1].replace("R", "").replace(",", ""))
)
social_worker_spend_jan = january_df.iloc[44, 1]
social_worker_spends = [social_worker_spend_dec, social_worker_spend_jan]

months = [202012, 202101]
month_names = ["Dec '20", "Jan '21"]
programme_spends = [
    Metric(
        name="Budget for social workers",
        metric_type=MetricTypeEnum.currency.name,
        value=sum(social_worker_spends),
        value_target=social_workers_budget,
        time=TimeValues(
            name="Spend this month",
            values=[
                TimeValue(month=months[i], name=month_names[i], value=value)
                for i, value in enumerate(social_worker_spends)
            ],
        ),
        gender=None,
        age=None,
        province=None,
    ),
]

budget_allocation_section = Section(
    name="Budget allocated to date",
    section_type=SectionEnum.budget_allocated.name,
    metrics=programme_spends,
)

department.sections.append(budget_allocation_section)

# Jobs added

# TODO: no info yet

# Jobs retained

dec_soc_worker_jobs = december_df.iloc[19, 1]
jan_soc_worker_jobs = january_df.iloc[19, 1]
social_worker_jobs = [dec_soc_worker_jobs, jan_soc_worker_jobs]
social_worker_job_target = 1809
soc_worker_dec_df = december_df.iloc[10:19, :2].set_index(0)
soc_worker_jan_df = january_df.iloc[10:19, :2].set_index(0)
jobs_retained = [
    Metric(
        name="Social workers retained",
        metric_type=MetricTypeEnum.count.name,
        value=social_worker_jobs[-1],
        value_target=social_worker_job_target,
        gender=GenderValues(
            name="Jobs retained by gender",
            values=[
                GenderValue(gender=GenderEnum.Male.name, value=january_df.iloc[36, 1]),
                GenderValue(
                    gender=GenderEnum.Female.name, value=january_df.iloc[37, 1]
                ),
            ],
        ),
        age=AgeValues(
            name="Jobs retained by age",
            values=[AgeValue(age_category="18-35", value=january_df.iloc[38, 1])],
        ),
        time=TimeValues(
            name="Jobs retained over time",
            values=[
                TimeValue(month=months[i], name=month_names[i], value=value)
                for i, value in enumerate(social_worker_jobs)
            ],
        ),
        province=ProvinceValues(
            name="Jobs retained by province",
            values=[
                ProvinceValue(
                    province=province_to_abbrev[province],
                    value=int(soc_worker_jan_df.loc[province]),
                )
                for province in list(soc_worker_jan_df.index)
            ],
        ),
    )
]

jobs_retained_section = Section(
    name="Jobs retained",
    section_type=SectionEnum.jobs_retain.name,
    metrics=jobs_retained,
)

department.sections.append(jobs_retained_section)

# Livelihoods section

# TODO: no data yet

# save all the data

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DOH

In [945]:
department_name = "Health"
sheet_name = "DOH"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="Primary Health Care is at the frontline of the battle against Covid-19",
    paragraph="TBD",
    sections=[],
    target_lines=[39, 43],
    achievement_lines=[39, 42],
)

# as there are errors in the main sheets for DoH, draw this info from DOH Excel
health_df = pd.read_excel(
    doh_january_excel, sheet_name="Verified by Provinces", header=None
)

health_job_target = 5_531
health_budget_target = 39_3571 * 1000

# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=health_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Job Opportunities",
            metric_type=MetricTypeEnum.count.name,
            value=health_job_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

months = [202010, 202011, 202012, 202101]
month_names = ["Oct '20", "Nov '20", "Dec '20", "Jan '21"]

# layout is Staff Nurse, Assistant Nurse x 4 (for October, November, December, January)
staff_nurse_spends = [health_df.iloc[47, col] for col in (1, 3, 5, 7)]
assistant_nurse_spends = [health_df.iloc[47, col] for col in (2, 4, 6, 8)]
staff_nurse_budget = sum(staff_nurse_spends)
assistant_nurse_budget = sum(assistant_nurse_spends)
section = Section(
    name="Budget allocated to date",
    section_type=MetricTypeEnum.currency.name,
    metrics=[
        Metric(
            name="Staff Nurse Budget",
            metric_type=MetricTypeEnum.count.name,
            value=staff_nurse_budget,
            time=TimeValues(
                name="Spend over time",
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(staff_nurse_spends)
                ],
            ),
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Assistant Nurse Budget",
            metric_type=MetricTypeEnum.count.name,
            value=assistant_nurse_budget,
            time=TimeValues(
                name="Spend over time",
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(assistant_nurse_spends)
                ],
            ),
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

# Job opportunties

staff_nurse_opportunities_df = pd.concat(
    [health_df.iloc[15:24, 0], health_df.iloc[15:24, 1:9:2].sum(axis=1)],
    axis=1,
)
staff_nurse_opportunities_df.columns = pd.Index([0, 1])
staff_nurse_opportunities_df = staff_nurse_opportunities_df.set_index(0)
assistant_nurse_opportunities_df = pd.concat(
    [health_df.iloc[15:24, 0], health_df.iloc[15:24, 2:9:2].sum(axis=1)], axis=1
)
assistant_nurse_opportunities_df.columns = pd.Index([0, 1])
assistant_nurse_opportunities_df = assistant_nurse_opportunities_df.set_index(0)
section = Section(
    name="Job opportunities created to date",
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name="Enrolled Nurses",  # same as Staff Nurses
            metric_type=MetricTypeEnum.count.name,
            value=opportunity_achievements_df.iloc[40, 13],
            value_target=opportunity_targets_df.iloc[41, 2],
            province=ProvinceValues(
                name="Opportunities by Province",
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=staff_nurse_opportunities_df.loc[province],
                    )
                    for province in list(staff_nurse_opportunities_df.index)
                ],
            ),
            time=TimeValues(
                name="Opportunities over time",
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(
                        list(opportunity_achievements_df.iloc[40, 10:14])
                    )
                ],
            ),
            gender=None,
            age=None,
        ),
        Metric(
            name="Auxiliary Nurses",  # same as Assistant Nurses
            metric_type=MetricTypeEnum.count.name,
            value=opportunity_achievements_df.iloc[41, 13],
            value_target=opportunity_targets_df.iloc[42, 2],
            province=ProvinceValues(
                name="Opportunities by Province",
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=assistant_nurse_opportunities_df.loc[province],
                    )
                    for province in list(assistant_nurse_opportunities_df.index)
                ],
            ),
            time=TimeValues(
                name="Opportunities over time",
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(
                        list(opportunity_achievements_df.iloc[41, 10:14])
                    )
                ],
            ),
            gender=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DALLR

In [946]:
department_name = "Agriculture, Land Reform and Rural Development"
sheet_name = "DALLRD"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="Expanding support to farmers and protecting food value chains",
    paragraph="""The pandemic has illustrated the vulnerability of our food production and distribution systems. Although exempt from the strictest lockdown regulations, the sector faced severe challenges with disruptions to production and marketing experienced by many small-scale farmers. """,
    sections=[],
    target_lines=[8, 14],
    achievement_lines=[9, 14],
)

january_df = pd.read_excel(january_excel, sheet_name=sheet_name, header=None)
dallrd_budget_target = 1_000_000_000
dallrd_livelihoods_target = opportunity_targets_df.iloc[8, 2]

graduate_employment_budget_target = january_df.iloc[58, 1]
# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=dallrd_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Graduate Employment budget",
            metric_type=MetricTypeEnum.currency.name,
            value=graduate_employment_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Livelihoods supported",
            metric_type=MetricTypeEnum.count.name,
            value=dallrd_livelihoods_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Budget allocation

# NO INFO AVAILABLE

months = [202012, 202101]
month_names = ["Dec '20", "Jan '21"]

# Job opportunties - for DALLR this is graduate employment

graduate_employment_by_province_df = january_df.iloc[43:52, :2].set_index(0)
graduate_employment_total = january_df.iloc[52, 1]
male_employment = january_df.iloc[54, 1] / 100
female_employment = january_df.iloc[55, 1] / 100
youth_employment = january_df.iloc[56, 1] / 100
section = Section(
    name="Job opportunities created to date",
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name="Graduate Employment",
            metric_type=MetricTypeEnum.count.name,
            value=graduate_employment_total,
            province=ProvinceValues(
                name="Opportunites by Province",
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=graduate_employment_by_province_df.loc[province],
                    )
                    for province in list(graduate_employment_by_province_df.index)
                ],
            ),
            time=None,
            gender=GenderValues(
                name="Opportunities by Gender",
                values=[
                    GenderValue(gender=GenderEnum.Male.name, value=male_employment),
                    GenderValue(gender=GenderEnum.Female.name, value=female_employment),
                ],
            ),
            age=AgeValues(
                name="Opportunities by Age",
                values=[AgeValue(age_category="18-35", value=youth_employment)],
            ),
        )
    ],
)

# department.sections.append(section)

# # Jobs retained

# # NO DATA

# # Livelihoods

# there are 5 programmes in columns 1 through 5
livelihoods_by_province = january_df.iloc[11:20, :6].set_index(0)
livelihoods_by_province.columns = [
    name.strip() for name in list(january_df.iloc[10, 1:6])
]
livelihoods_totals = list(january_df.iloc[20, 1:6])

livelihoods_programme_names = list(livelihoods_by_province.columns)

livelihoods_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[9:14, 1],
        opportunity_achievements_df.iloc[9:14, 12:14],
    ],
    axis=1,
).set_index(1)
livelihoods_achievements_df.index = pd.Index(
    [name.strip() for name in list(livelihoods_achievements_df.index)]
)

livelihood_programme_metrics = [
    Metric(
        name=programme_name,
        metric_type=MetricTypeEnum.count.name,
        value=livelihoods_totals[i],
        province=ProvinceValues(
            name="Livelihoods by province",
            values=[
                ProvinceValue(
                    province=province_to_abbrev[province],
                    value=livelihoods_by_province.loc[province, programme_name],
                )
                for j, province in enumerate(list(livelihoods_by_province.index))
            ],
        ),
        gender=GenderValues(
            name="Livelihoods by gender",
            values=[
                GenderValue(
                    gender=GenderEnum.Male.name, value=january_df.iloc[24].iloc[1 + i]
                ),
                GenderValue(
                    gender=GenderEnum.Female.name, value=january_df.iloc[25].iloc[1 + i]
                ),
            ],
        ),
        time=TimeValues(
            name="Livelihoods over time",
            values=[
                TimeValue(
                    month=month,
                    name=month_names[month_index],
                    value=livelihoods_achievements_df.loc[programme_name].iloc[
                        month_index
                    ],
                )
                for month_index, month in enumerate(months)
            ],
        ),
        age=AgeValues(
            name="Livelihood by age",
            values=[
                AgeValue(age_category="18-35", value=january_df.iloc[26].iloc[1 + i])
            ],
        ),
    )
    for i, programme_name in enumerate(livelihoods_programme_names)
]

section = Section(
    name="Livelihoods supported",
    section_type=SectionEnum.livelihoods.name,
    metrics=livelihood_programme_metrics,
)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DSI

In [947]:
department_name = "Science and Innovation"
sheet_name = "DSI"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="Supporting new graduates entering a hostile labour market",
    paragraph="Given a constrained labour market, fewer opportunities will be available to graduates leaving institutions of higher learning in 2021.\n\nThe Department of Science and Innovation will deliver four programmes through its entities designed to minimise this impact, which will together offer 1,900 unemployed graduates an opportunity to earn an income while gaining meaningful work experience.",
    sections=[],
    target_lines=[43, 47],
    achievement_lines=[42, 46],
)


dsi_budget_target = 45_000_000
dsi_jobs_target = opportunity_targets_df.iloc[43:47, 2].sum()  # sum the 4 programmes

dsi_programme_targets_df = opportunity_targets_df.iloc[43:47, 1:3].set_index(1)
programme_target_metrics = [
    Metric(
        name=programme_name,
        metric_type=MetricTypeEnum.count.name,
        value=dsi_programme_targets_df.loc[programme_name].iloc[0],
        time=None,
        gender=None,
        age=None,
        province=None,
    )
    for programme_name in list(dsi_programme_targets_df.index)
]

# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=dsi_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Overall jobs created",
            metric_type=MetricTypeEnum.count.name,
            value=dsi_jobs_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)
section.metrics.extend(programme_target_metrics)

department.sections.append(section)

# Job creation results

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DSAC

In [948]:
department_name = "Sports, Arts and Culture"
sheet_name = "DSAC"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="To get artists, cultural workers and the sporting sector on the road to recovery",
    paragraph="Under lockdown, there has been no loud applause in jazz venues, no curtain calls for the dancers, no tourists in craft markets – and no victory laps for our sports people. No segment of the creative, cultural and sporting sectors have been untouched",
    sections=[],
    target_lines=[25, 37],
    achievement_lines=[25, 37],
)


dsac_budget_target = 525_000_000
dsac_support_target = opportunity_targets_df.iloc[25:37, 2].sum()

dsac_programme_targets_df = opportunity_targets_df.iloc[25:37, 1:3].set_index(1)
programme_target_metrics = [
    Metric(
        name=programme_name,
        metric_type=MetricTypeEnum.count.name,
        value=dsac_programme_targets_df.loc[programme_name].iloc[0],
        time=None,
        gender=None,
        age=None,
        province=None,
    )
    for programme_name in list(dsac_programme_targets_df.index)
]

# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=dsac_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Overall opportunities created",
            metric_type=MetricTypeEnum.count.name,
            value=dsac_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)
section.metrics.extend(programme_target_metrics)

department.sections.append(section)

# Job creation results

months = [202011, 202012, 202101]
month_names = ["Nov '20", "Dec '20", "Jan '21"]

programme_achievements_df = (
    pd.concat(
        [
            opportunity_achievements_df.iloc[25:37, 1],
            opportunity_achievements_df.iloc[25:37, 11:14],
        ],
        axis=1,
    )
    .set_index(1)
    .drop(
        "Community Arts Centres (Provincial Public Entities) "  # this does not exist in targets
    )
)

programme_targets_df = opportunity_targets_df.iloc[25:37, 1:3].set_index(1)

programme_achievement_metrics_jobs = [
    Metric(
        name=programme_name,
        metric_type=MetricTypeEnum.count.name,
        value=programme_achievements_df.loc[programme_name].iloc[-1],
        time=TimeValues(
            name="Jobs created over time",
            values=[
                TimeValue(month=months[i], name=month_names[i], value=value)
                for i, value in enumerate(
                    list(programme_achievements_df.loc[programme_name])
                )
                if value != 0
            ],
        )
        if programme_achievements_df.loc[programme_name].sum() > 0
        else None,
        gender=None,
        age=None,
        province=None,
    )
    for programme_name in list(programme_achievements_df.index)
    if opportunity_type_df.loc[programme_name].iloc[0] == "CRE"
]

for section_type, name, filter_by in [
    (SectionEnum.job_opportunities.name, "Job opportunities created", "CRE"),
    (SectionEnum.jobs_retain.name, "Jobs retained", "RET"),
    (SectionEnum.livelihoods.name, "Livelihoods supported", "LIV"),
]:
    programme_achievement_metrics = [
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.count.name,
            value=programme_achievements_df.loc[programme_name].iloc[-1],
            time=TimeValues(
                name="Jobs created over time",
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(
                        list(programme_achievements_df.loc[programme_name])
                    )
                    if value != 0
                ],
            )
            if programme_achievements_df.loc[programme_name].sum() > 0
            else None,
            gender=None,
            age=None,
            province=None,
            value_target=programme_targets_df.loc[programme_name].iloc[0],
        )
        for programme_name in list(programme_achievements_df.index)
        if opportunity_type_df.loc[programme_name].iloc[0] == "CRE"
    ]
    section = Section(
        name=name, section_type=section_type, metrics=programme_achievement_metrics
    )
    department.sections.append(section)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DoT

In [949]:
department_name = "Transport"
sheet_name = "DoT"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="Improving access to services and opportunities for people in rural areas",
    paragraph="Rural roads play a vital role in connecting rural communities to services such as health and education, as well as providing access to markets and economic opportunities. However, rural roads infrastructure remains poor in many areas of South Africa",
    sections=[],
    target_lines=[24, 25],
    achievement_lines=[24, 25],
)

january_df = pd.read_excel(january_excel, sheet_name=sheet_name, header=None)

dot_budget_target = 630_000_000
dot_support_target = opportunity_targets_df.iloc[24, 2]

# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=dot_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Overall opportunities created",
            metric_type=MetricTypeEnum.count.name,
            value=dot_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

rural_road_total = opportunity_achievements_df.iloc[24, 13]
prov_rural_road_df = january_df.iloc[15:24, :2].fillna(0).set_index(0)
section = Section(
    name="Job opportunities created",
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name="Rural roads maintenance",
            metric_type=MetricTypeEnum.count.name,
            value=rural_road_total,
            value_target=dot_support_target,
            time=None,
            gender=None,
            age=None,
            province=ProvinceValues(
                "Opportunities by province",
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=int(prov_rural_road_df.loc[province].iloc[0]),
                    )
                    for province in list(prov_rural_road_df.index)
                ],
            ),
        )
    ],
)

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DPWI

In [950]:
department_name = "Public Works and Infrastructure"
sheet_name = "DPWI"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="Graduate placements in the professional services",
    paragraph="In addition to structural skills shortages that were experienced prior to the pandemic, the management of facilities and completion of infrastructure projects has been further impacted by restrictions on the movement of people and limitations placed on completing infrastructure projects during the lockdown. As the economy re-opens, additional capacity is required to address the backlog so that service provision can be restored",
    sections=[],
    target_lines=[47, 56],
    achievement_lines=[46, 54],
)

january_df = pd.read_excel(january_excel, sheet_name=sheet_name, header=None)

dpwi_budget_target = 159_000_000
dpwi_support_target = opportunity_targets_df.iloc[47, 2]

# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=dot_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Overall opportunities created",
            metric_type=MetricTypeEnum.count.name,
            value=dot_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

dpwi_job_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[46:54, 1],
        opportunity_achievements_df.iloc[46:54, 11:14],
    ],
    axis=1,
).set_index(1)

programme_names = list(january_df.iloc[8, 1:9])

months = [202011, 202012, 202101]
month_names = ["Nov '20", "Dec '20", "Jan '21"]
dpwi_jobs_by_province_df = january_df.iloc[9:18, :10].fillna(0).set_index(0)
section = Section(
    name="Job opportunities created",
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.count.name,
            value=dpwi_job_achievements_df.loc[programme_name].iloc[
                -1
            ],  # we don't know per programme targets
            time=TimeValues(
                name="Job opportunities over time",
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=int(value))
                    for i, value in enumerate(
                        dpwi_job_achievements_df.loc[programme_name]
                    )
                    if value != 0
                ],
            ),
            gender=GenderValues(
                name="Job opportunies by gender",
                values=[
                    GenderValue(
                        gender=GenderEnum.Male.name,
                        value=january_df.iloc[35, 1:9].fillna(0).iloc[programme_index],
                    ),
                    GenderValue(
                        gender=GenderEnum.Female.name,
                        value=january_df.iloc[36, 1:9].fillna(0).iloc[programme_index],
                    ),
                ],
            ),
            age=AgeValues(
                name="Job opportunities by age",
                values=[
                    AgeValue(
                        age_category="18-35",
                        value=january_df.iloc[37, 1:9].fillna(0).iloc[programme_index],
                    )
                ],
            ),
            province=ProvinceValues(
                "Opportunities by province",
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=int(
                            dpwi_jobs_by_province_df.loc[province].iloc[programme_index]
                        ),
                    )
                    for province in list(dpwi_jobs_by_province_df.index)
                    if dpwi_jobs_by_province_df.loc[province].iloc[programme_index] != 0
                ],
            ),
        )
        for programme_index, programme_name in enumerate(programme_names)
    ],
)

department.sections.append(section)
# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DEFF

In [951]:
department_name = "Environment, Forestry and Fisheries"
sheet_name = "DEFF"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="Investing in the environment we live in",
    paragraph="The work undertaken in environmental, forestry and fishery programmes will touch the length and breadth of the country, from coast to coast, including bushveld, grassland, fynbos, wetlands, mountains, water bodies, catchment areas  – and urban areas, too. The work undertaken affects the air we breathe, the water we drink, the energy we use and the food we eat, supporting a wealth of biodiversity resources and ecological systems essential to life on earth and to the future of the planet.",
    sections=[],
    target_lines=[14, 24],
    achievement_lines=[14, 24],
)

deff_budget_target = 1_983_000 * 1000
deff_support_target = opportunity_targets_df.iloc[14:24, 2:3].sum()

# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=deff_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Overall opportunities created",
            metric_type=MetricTypeEnum.count.name,
            value=deff_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

deff_programme_targets = opportunity_targets_df.iloc[14:24, 1:3].set_index(1)

deff_job_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[14:24, 1],
        opportunity_achievements_df.iloc[14:24, 11:14],
    ],
    axis=1,
).set_index(1)

programme_names = list(deff_job_achievements_df.index)

months = [202011, 202012, 202101]
month_names = ["Nov '20", "Dec '20", "Jan '21"]
dpwi_jobs_by_province_df = january_df.iloc[9:18, :10].fillna(0).set_index(0)
section = Section(
    name="Job opportunities created",
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.count.name,
            value=deff_job_achievements_df.loc[programme_name].max(),
            value_target=deff_programme_targets.loc[programme_name],
            time=TimeValues(
                name="Opportunities over time",
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=int(value))
                    for i, value in enumerate(
                        deff_job_achievements_df.loc[programme_name]
                    )
                    if value != 0
                ],
            ),
            gender=None,
            province=None,
            age=None,
        )
        for programme_index, programme_name in enumerate(programme_names)
    ],
)

department.sections.append(section)
# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## COGTA

In [952]:
department_name = "Co-operative government"
sheet_name = "DCOGTA"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead="Mainstreaming and improving labour-intensity in infrastructure delivery",
    paragraph="Prioritising infrastructure maintenance Mainstreaming and improving labour-intensity in infrastructure deliveryCommunity access to water and sanitation is all the more important in the context of the crisisTOTAL BUDGETR50MJOB OPPORTUNITIES25,000 Before the crisis, many municipalities were already facing critical funding shortfalls and challenges in the sustainable delivery of basic services and the maintenance of infrastructure. The pandemic has compounded these problems by cancelling or stalling implementation of all non-critical infrastructure projects",
    sections=[],
    target_lines=[37, 38],
    achievement_lines=[37, 38],
)

dcogta_budget_target = 50_000_000
dcogta_support_target = opportunity_targets_df.iloc[37, 2]

# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=dcogta_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Overall opportunities created",
            metric_type=MetricTypeEnum.count.name,
            value=dcogta_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

# NO DATA

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## Overview picture

In [953]:
# not aligned: the target and achievement sections do not align
# DALLRD: target is entirely 'Subsistence relief fund': LIV
# DOH: target CHW and Outreach Team Leaders is collapsed into 1 in achievements: CRE
# DPWI: target is entirely 'Graduate programmes': CRE
not_aligned = ["DALLRD", "DOH", "DPWI"]

all_data.overview.overview_sections = []

dallrd_dept = [dept for dept in all_data.departments if dept.sheet_name == "DALLRD"][0]
doh_dept = [dept for dept in all_data.departments if dept.sheet_name == "DOH"][0]
dpwi_dept = [dept for dept in all_data.departments if dept.sheet_name == "DPWI"][0]

total_target = 0
total_achievement = 0
section_details = dict(
    LIV=dict(name="Livelihoods", section_type=SectionEnum.livelihoods.name),
    CRE=dict(name="Jobs created", section_type=SectionEnum.job_opportunities.name),
    RET=dict(
        name="Vulnerable jobs retained", section_type=SectionEnum.jobs_retain.name
    ),
)
for opportunity_type in ("LIV", "CRE", "RET"):
    metrics = []
    for dept in all_data.departments:
        if dept.sheet_name in not_aligned:
            continue
        if dept.sheet_name == "DEFF":
            # a horrible hack because DEFF has data, sometimes, for November 2020, but not for January 2021
            opportunity_achievements_df.iloc[
                dept.achievement_lines[0] : dept.achievement_lines[1], 13
            ][
                opportunity_achievements_df.iloc[
                    dept.achievement_lines[0] : dept.achievement_lines[1], 13
                ]
                == 0
            ] = opportunity_achievements_df.iloc[
                dept.achievement_lines[0] : dept.achievement_lines[1], 11
            ][
                opportunity_achievements_df.iloc[
                    dept.achievement_lines[0] : dept.achievement_lines[1], 13
                ]
                == 0
            ]
        filter_list = list(
            opportunity_targets_df.iloc[dept.target_lines[0] : dept.target_lines[1], 4]
            == opportunity_type
        )
        in_section = functools.reduce(lambda x, y: x or y, filter_list, False)
        if not in_section:
            continue
        target = int(
            opportunity_targets_df.iloc[dept.target_lines[0] : dept.target_lines[1], 2][
                filter_list
            ].sum()
        )
        total_target += target
        achievement = int(
            opportunity_achievements_df.iloc[
                dept.achievement_lines[0] : dept.achievement_lines[1], 13
            ][filter_list].sum()
        )
        total_achievement += achievement
        metrics.append(
            OverviewMetric(
                name=dept.name,
                metric_type=MetricTypeEnum.count.name,
                value=achievement,
                value_target=target,
                time=None,
            )
        )
    # for the unaligned sections we can't use the "filter list approach"
    # - luckily for now all of these are for departments with single opportunity type
    if opportunity_type == "LIV":
        for dept in [dallrd_dept]:
            achievement = int(
                opportunity_achievements_df.iloc[
                    dept.achievement_lines[0] : dept.achievement_lines[1], 13
                ].sum()
            )
            target = int(
                opportunity_targets_df.iloc[
                    dept.target_lines[0] : dept.target_lines[1], 2
                ].sum()
            )
            total_achievement += achievement
            total_target += target
            metrics.append(
                OverviewMetric(
                    name=dept.name,
                    metric_type=MetricTypeEnum.count.name,
                    value=achievement,
                    value_target=target,
                    time=None,
                )
            )
    elif opportunity_type == "CRE":
        for dept in [doh_dept, dpwi_dept]:
            achievement = int(
                opportunity_achievements_df.iloc[
                    dept.achievement_lines[0] : dept.achievement_lines[1], 13
                ].sum()
            )
            target = int(
                opportunity_targets_df.iloc[
                    dept.target_lines[0] : dept.target_lines[1], 2
                ].sum()
            )
            total_achievement += achievement
            total_target += target
            metrics.append(
                OverviewMetric(
                    name=dept.name,
                    metric_type=MetricTypeEnum.count.name,
                    value=achievement,
                    value_target=target,
                    time=None,
                )
            )
    section = OverviewSection(
        name=section_details[opportunity_type]["name"],
        section_type=section_details[opportunity_type]["section_type"],
        metrics=metrics,
    )
    all_data.overview.overview_sections.append(section)

# Save final data

In [954]:
output_filename = output_dir + "/all_data.json"
open(output_filename, "w").write(all_data.to_json(indent=2))
# print(all_data.to_json(indent=2))

84529