In [1]:
%load_ext lab_black

In [2]:
%load_ext autoreload

%autoreload 2

In [3]:
from datetime import datetime
import functools
import json
import re
import sys

import altair as alt
import numpy as np
import pandas as pd

sys.path.append("../python-src")
from presidential_employment import *

IndentationError: unexpected indent (presidential_employment.py, line 119)

In [None]:
print(sys.argv[0])

In [None]:
output_dir = "/home/pvh/Documents/code/pvh-forks/presidential-employment-stimulus/data"

### Data structure

Each department has a total budget and total opportunities target. 

The overall programme has outcome targets
1. Jobs created
2. Jobs retained
3. Livelihoods supported

Each department has a "blurb" describing their programme.

Within each department there are multiple programmes that can contribute to each of these targets.

Each programme has a demographic split of outcomes, with gender and youth percentages.

Each programme has a per-province split of outcomes.

Files:

`Consolidated data (Dec) - Presidential Employment Stimulus.xlsx` - December sheet

`Consolidated Presidential Employment Stimulus Reporting Template.xlsx` - January sheet

In [None]:
december_excel = "Consolidated data (Dec) - Presidential Employment Stimulus.xlsx"
january_excel = "Consolidated Presidential Employment Stimulus Reporting Template.xlsx"
doh_january_excel = (
    "DoH_Consolidated Appointment of Nurses - Verified by Provinces.xlsx"
)
march_excel = (
    "Consolidated Presidential Employment Stimulus Reporting Template 200321.xlsx"
)

jan_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 170321 categories.xlsx"
)
mar_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 230321.xlsx"
)

opportunity_targets_df = pd.read_excel(
    mar_opportunities_excel, sheet_name="Targets", header=None
).fillna(0)
opportunity_achievements_df = pd.read_excel(
    mar_opportunities_excel, sheet_name="Trends", header=None
).fillna(0)

implementation_status_df = pd.read_excel(
    mar_opportunities_excel,
    sheet_name="Implementation status",
    skiprows=2,
    usecols=range(4),
)

opportunity_type_df = pd.concat(
    [opportunity_targets_df.iloc[2:56, 1], opportunity_targets_df.iloc[2:56, 4]], axis=1
).set_index(1)

december_sheets = pd.ExcelFile(december_excel).sheet_names
january_sheets = pd.ExcelFile(january_excel).sheet_names

```
interface DepartmentMonth {
  month: number // 202101
  name: string // Basic Education
  lead: string // Strengthening the learning environment in schools
  paragraph: string
  sections: Array<{
    name: string // Budget allocated to date
    matrics: Array<{
      name: string // Educational and general assistants
      type: 'currency' | 'count'
      value: number
      valueTarget?: number
      time?: {
        name: string // spend
        values: Array<{
          month: number // 202101
          value: number
        }>
      }
      gender?: {
        name: string // opportunities
        values: Array<{
          gender: 'female' | 'male'
          value: number
        }>
      }
      age?: {
        name: string // opportunities
        values: Array<{
          age: string // 18-35
          value: number
        }>
      }
      province?: {
        name: string // opportunities
        values: Array<{
          province: 'EC' | 'FS' | 'GP' | 'KZN' | 'LP' | 'MP' | 'NC' | 'NW' | 'WC'
        }>
      }
    }>
  }>
}
```

## 

# Top level structure

In [None]:
all_data = Everything(
    overview=Overview(
        month=202103,
        name="Programme overview",
        lead=leads["overview"],
        paragraph=paragraphs["overview"],
        sections=[],
    ),
    departments=[],
)

## DTIC

In [None]:
department_name = "Trade, Industry and Competition"
sheet_name = "DTIC"

department = Department(
    month=202012,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[38, 39],
    achievement_lines=[38, 39],
    implementation_details=[],
)

budget_target = 120_000 * 1000
opportunities_target = int(
    opportunity_targets_df.iloc[
        department.target_lines[0] : department.target_lines[1], 2
    ]
)
opportunities_achieved = int(
    opportunity_achievements_df.iloc[
        department.achievement_lines[0] : department.achievement_lines[1], 13
    ]
)

# Programme targets for this department
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_achieved,
            value_target=opportunities_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

december_df = pd.read_excel(december_excel, sheet_name=sheet_name, header=None)

oct_nov_spend = december_df.iloc[48, 1]
dec_spend = december_df.iloc[49, 1]

programme_name = "Global Business Services Sector"

# budget allocated to date

# removed this section for now
# section = Section(
#     name="Budget allocated to date",
#     section_type=SectionEnum.budget_allocated.name,
#     metrics=[
#         Metric(
#             name=programme_name,
#             metric_type=MetricTypeEnum.currency.name,
#             value=dec_spend,
#             value_target=budget_target,
#             time=TimeValues(
#                 name="Spent over time",
#                 values=[
#                     TimeValue(month=202011, name="Nov '20", value=oct_nov_spend),
#                     TimeValue(month=202012, name="Dec '20", value=dec_spend),
#                 ],
#             ),
#             gender=None,
#             age=None,
#             province=None,
#         )
#     ],
# )

# department.sections.append(section)

# Job opportunities created to date

total_programme_jobs = december_df.iloc[28:37, 1].sum()
percentage_male = december_df.iloc[41, 1]
percentage_female = december_df.iloc[42, 1]
percentage_youth = december_df.iloc[43, 1]


by_province_df = december_df.iloc[29:37, :2].set_index(0)

province_values = ProvinceValues(
    name=metric_titles[SectionEnum.job_opportunities.name][
        MetricTypeEnum.count.name + "_province"
    ],
    values=[],
)
for province in list(by_province_df.index):
    pv = ProvinceValue(
        province=province_to_abbrev[province], value=int(by_province_df.loc[province])
    )
    province_values.values.append(pv)

opportunities_achieved_by_time = opportunity_achievements_df.iloc[
    department.achievement_lines[0], 10:14
].tolist()

jobs_implemented = list(
    opportunity_achievements_df.iloc[
        department.achievement_lines[0],
    ]
)


implementation_info = implementation_status_df[
    implementation_status_df.Programme == programme_name
]

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.count.name,
            value=total_programme_jobs,
            time=TimeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(opportunities_achieved_by_time)
                ],
            ),
            gender=GenderValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_gender"
                ],
                values=[
                    GenderValue(gender=GenderEnum.Male.name, value=percentage_male),
                    GenderValue(gender=GenderEnum.Female.name, value=percentage_female),
                ],
            ),
            age=AgeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_age"
                ],
                values=[AgeValue(age_category="18-35", value=percentage_youth)],
            ),
            province=province_values,
        )
    ],
)

department.sections.append(section)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)
print(department.to_json(indent=2))

## DBE

In [None]:
department_name = "Basic Education"
sheet_name = "DBE"

department = Department(
    month=202103,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[2, 5],
    achievement_lines=[3, 6],
    implementation_details=[],
)

budget_target = 7_000_000 * 1000
# opportunities_target = 344_933
opportunities_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()

# [december_df.iloc[19, 1], january_df.iloc[19, 1]]
# NOTE: this is replaced with the calculation below from the other spreadsheets
# opportunities_achieved = opportunity_achievements_df.iloc[
#     department.achievement_lines[0] : department.achievement_lines[1], 13
# ].sum()

december_df = pd.read_excel(
    december_excel,
    sheet_name=sheet_name,
    header=None,
)
january_df = pd.read_excel(
    january_excel,
    sheet_name=sheet_name,
    header=None,
)
march_df = pd.read_excel(march_excel, sheet_name=sheet_name, header=None)

# assistants_budget = january_df.iloc[42, 1]
# post_saving_budget = january_df.iloc[42, 2]

posts_saved_achieved = opportunity_achievements_df.iloc[5, 13]

ed_assistants_achieved = opportunity_achievements_df.iloc[3, 13]
gen_assistants_achieved = opportunity_achievements_df.iloc[4, 13]

opportunities_achieved = sum(
    [posts_saved_achieved, ed_assistants_achieved, gen_assistants_achieved]
)
# Programme targets for this department
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        #         Metric(
        #             name="Budget for Education and General Assistant Posts",
        #             metric_type=MetricTypeEnum.currency.name,
        #             value=assistants_budget,
        #             time=None,
        #             gender=None,
        #             age=None,
        #             province=None,
        #         ),
        #         Metric(
        #             name="Budget for Saving Vulnerable Posts",
        #             metric_type=MetricTypeEnum.currency.name,
        #             value=post_saving_budget,
        #             time=None,
        #             gender=None,
        #             age=None,
        #             province=None,
        #         ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_achieved,
            value_target=opportunities_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

# NOTE: removed programme spends for now
# programme_spends = [
#     dict(
#         name="Education and general assistants",
#         amounts=[december_df.iloc[44, 1], january_df.iloc[44, 1]],
#     ),
#     dict(
#         name="Vulnerable  posts saved",
#         amounts=[december_df.iloc[44, 2], january_df.iloc[44, 2]],
#     ),
# ]


# for programme in programme_spends:
#     values = []
#     total_value = 0
#     for i, value in enumerate(programme["amounts"]):
#         values.append(TimeValue(month=months[i], name=month_names[i], value=value))
#         total_value += value
#     metric = Metric(
#         name=programme["name"],
#         metric_type=MetricTypeEnum.currency.name,
#         value=total_value,
#         value_target=budget_target,
#         time=TimeValues(name="Spent over time", values=values),
#         gender=None,
#         age=None,
#         province=None,
#     )
#     metrics.append(metric)

# programme_name = "Global Business Services Sector expansion"

# # budget allocated to date
# section = Section(
#     name="Budget allocated to date",
#     section_type=SectionEnum.budget_allocated.name,
#     metrics=metrics,
# )

# department.sections.append(section)

# Job opportunities created to date

ed_assistants_by_prov_df = march_df.iloc[10:19, :2].set_index(0)
gen_assistants_by_prov_df = march_df.iloc[10:19, :3].drop(1, axis=1).set_index(0)

ed_assistants_target = opportunity_targets_df.iloc[2, 2]
gen_assistants_target = opportunity_targets_df.iloc[3, 2]

job_opportunity_programmes = [
    dict(
        name="Education Assistants",
        totals=list(opportunity_achievements_df.iloc[3, 10:14]),
        province=ProvinceValues(
            name=metric_titles[SectionEnum.job_opportunities.name][
                MetricTypeEnum.count.name + "_province"
            ],
            values=[
                ProvinceValue(
                    province=province_to_abbrev[province],
                    value=int(
                        str(ed_assistants_by_prov_df.loc[province].iloc[0]).replace(
                            " ", ""
                        )
                    ),  # all of this is because of a space in a value
                )
                for province in list(ed_assistants_by_prov_df.index)
            ],
        ),
        target=ed_assistants_target,
    ),
    dict(
        name="General Assistants",
        totals=list(opportunity_achievements_df.iloc[4, 10:14]),
        province=ProvinceValues(
            name=metric_titles[SectionEnum.job_opportunities.name][
                MetricTypeEnum.count.name + "_province"
            ],
            values=[
                ProvinceValue(
                    province=province_to_abbrev[province],
                    value=int(gen_assistants_by_prov_df.loc[province].iloc[0]),
                )
                for province in list(gen_assistants_by_prov_df.index)
            ],
        ),
        target=gen_assistants_target,
    ),
]

metrics = []
for programme in job_opportunity_programmes:
    metric = Metric(
        name=programme["name"],
        metric_type=MetricTypeEnum.count.name,
        time=TimeValues(
            name=metric_titles[SectionEnum.job_opportunities.name][
                MetricTypeEnum.count.name + "_time"
            ],
            values=[
                TimeValue(month=months[i], name=month_names[i], value=value)
                for i, value in enumerate(programme["totals"])
            ],
        ),
        gender=None,
        age=None,
        province=programme["province"],
        value=programme["totals"][-1],
        value_target=programme["target"],
    )
    metrics.append(metric)

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=metrics,
)

department.sections.append(section)

# Jobs retained

jobs_retained_over_time_df = march_df.iloc[10:19, :4].drop([1, 2], axis=1).set_index(0)

jobs_retained_over_time = opportunity_achievements_df.iloc[5, 10:14].tolist()
posts_saved_target = opportunity_targets_df.iloc[4, 2]

section = Section(
    name=section_titles[SectionEnum.jobs_retain.name],
    section_type=SectionEnum.jobs_retain.name,
    metrics=[
        Metric(
            name="Job retention at fee paying schools",
            metric_type=MetricTypeEnum.count.name,
            time=TimeValues(
                name=metric_titles[SectionEnum.jobs_retain.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(
                        month=months[i],
                        name=month_names[i],
                        value=value,
                    )
                    for i, value in enumerate(jobs_retained_over_time)
                ],
            ),
            value=posts_saved_achieved,
            value_target=posts_saved_target,
            gender=None,
            age=None,
            province=ProvinceValues(
                name=metric_titles[SectionEnum.jobs_retain.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=jobs_retained_over_time_df.loc[province],
                    )
                    for province in list(gen_assistants_by_prov_df.index)
                ],
            ),
        )
    ],
)

department.sections.append(section)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)
print(department.to_json(indent=2))

## DSD

In [None]:
department_name = "Social Development"
sheet_name = "DSD"

department = Department(
    month=202103,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[5, 8],
    achievement_lines=[6, 9],
    implementation_details=[],
)


budget_target = 588_728 * 1000
beneficiaries_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()

december_df = pd.read_excel(
    december_excel,
    sheet_name=sheet_name,
    header=None,
)
january_df = pd.read_excel(
    january_excel,
    sheet_name=sheet_name,
    header=None,
)
march_df = pd.read_excel(march_excel, sheet_name=sheet_name, header=None)

beneficiaries_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()

social_workers_budget = march_df.iloc[42, 1]
registration_support_budget = int(
    float(re.sub(r"[^\d]+([\d.]+).*", r"\1", december_df.iloc[42, 2])) * 1_000_000
)  # pull number out of R 16.5 million
# Programme targets for this department
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        #         Metric(
        #             name="Budget for social workers",
        #             metric_type=MetricTypeEnum.currency.name,
        #             value=social_workers_budget,
        #             time=None,
        #             gender=None,
        #             age=None,
        #             province=None,
        #         ),
        #         Metric(
        #             name="Budget for registration support officers",
        #             metric_type=MetricTypeEnum.currency.name,
        #             value=registration_support_budget,
        #             time=None,
        #             gender=None,
        #             age=None,
        #             province=None,
        #         ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=beneficiaries_achieved,
            value_target=beneficiaries_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

# budget allocated to date

social_worker_spend_dec = int(
    float(december_df.iloc[44, 1].replace("R", "").replace(",", ""))
)
social_worker_spend_jan = january_df.iloc[44, 1]
social_worker_spends = [social_worker_spend_dec, social_worker_spend_jan]

programme_spends = [
    Metric(
        name="Budget for social workers",
        metric_type=MetricTypeEnum.currency.name,
        value=sum(social_worker_spends),
        value_target=social_workers_budget,
        time=TimeValues(
            name="Spend this month",
            values=[
                TimeValue(month=months[i], name=month_names[i], value=value)
                for i, value in enumerate(social_worker_spends)
            ],
        ),
        gender=None,
        age=None,
        province=None,
    ),
]

# budget_allocation_section = Section(
#     name=section_titles[SectionEnum.budget_allocated.name],
#     section_type=SectionEnum.budget_allocated.name,
#     metrics=programme_spends,
# )

# department.sections.append(budget_allocation_section)

# Jobs added

# TODO: update this when data on trends becomes available

department.sections.append(
    Section(
        name=section_titles[SectionEnum.job_opportunities.name],
        section_type=SectionEnum.job_opportunities.name,
        metrics=[
            Metric(
                name="Registration support officers",
                metric_type=MetricTypeEnum.count.name,
                value=int(opportunity_achievements_df.iloc[7, 13]),
                value_target=int(opportunity_targets_df.iloc[6, 2]),
                time=None,
                age=None,
                gender=None,
                province=None,
            )
        ],
    )
)
# Jobs retained

social_worker_jobs = opportunity_achievements_df.iloc[8, 10:14].tolist()
social_worker_job_target = int(opportunity_targets_df.iloc[8, 2])
# soc_worker_dec_df = december_df.iloc[10:19, :2].set_index(0)
# soc_worker_jan_df = january_df.iloc[10:19, :2].set_index(0)
soc_worker_province_breakdown_df = march_df.iloc[10:19, :2].set_index(0)

jobs_retained = [
    Metric(
        name="Retention of social workers",
        metric_type=MetricTypeEnum.count.name,
        value=social_worker_jobs[-1],
        value_target=social_worker_job_target,
        gender=GenderValues(
            name=metric_titles[SectionEnum.jobs_retain.name][
                MetricTypeEnum.count.name + "_gender"
            ],
            values=[
                GenderValue(gender=GenderEnum.Male.name, value=march_df.iloc[36, 1]),
                GenderValue(gender=GenderEnum.Female.name, value=march_df.iloc[37, 1]),
            ],
        ),
        age=AgeValues(
            name=metric_titles[SectionEnum.jobs_retain.name][
                MetricTypeEnum.count.name + "_age"
            ],
            values=[AgeValue(age_category="18-35", value=march_df.iloc[38, 1])],
        ),
        time=TimeValues(
            name=metric_titles[SectionEnum.jobs_retain.name][
                MetricTypeEnum.count.name + "_time"
            ],
            values=[
                TimeValue(month=months[i], name=month_names[i], value=value)
                for i, value in enumerate(social_worker_jobs)
            ],
        ),
        province=ProvinceValues(
            name=metric_titles[SectionEnum.jobs_retain.name][
                MetricTypeEnum.count.name + "_province"
            ],
            values=[
                ProvinceValue(
                    province=province_to_abbrev[province],
                    value=int(soc_worker_province_breakdown_df.loc[province]),
                )
                for province in list(soc_worker_province_breakdown_df.index)
            ],
        ),
    )
]

jobs_retained_section = Section(
    name=section_titles[SectionEnum.jobs_retain.name],
    section_type=SectionEnum.jobs_retain.name,
    metrics=jobs_retained,
)

department.sections.append(jobs_retained_section)

# Livelihoods section

# TODO: update when trend data becomes available

department.sections.append(
    Section(
        name=section_titles[SectionEnum.job_opportunities.name],
        section_type=SectionEnum.job_opportunities.name,
        metrics=[
            Metric(
                name="ECD income and compliance support",
                metric_type=MetricTypeEnum.count.name,
                value=int(opportunity_achievements_df.iloc[6, 13]),
                value_target=int(opportunity_targets_df.iloc[5, 2]),
                time=None,
                age=None,
                gender=None,
                province=None,
            )
        ],
    )
)
# save all the data

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## DOH

In [None]:
department_name = "Health"
sheet_name = "DoH"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name.upper()],
    paragraph=paragraphs[sheet_name.upper()],
    sections=[],
    target_lines=[39, 43],
    achievement_lines=[39, 42],
    implementation_details=[],
)

# as there are errors in the main sheets for DoH, draw this info from DOH Excel
# health_df = pd.read_excel(
#     doh_january_excel, sheet_name="Verified by Provinces", header=None
# )

march_df = pd.read_excel(march_excel, sheet_name=sheet_name, header=None)

health_job_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()
health_job_achievements = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()

health_budget_target = 39_3571 * 1000

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=health_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=health_job_achievements,
            value_target=health_job_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# # layout is Staff Nurse, Assistant Nurse x 4 (for October, November, December, January)
# # TODO: fix the strangely precise Staff Nurse Budget
# staff_nurse_spends = [health_df.iloc[47, col] for col in (1, 3, 5, 7)]
# assistant_nurse_spends = [health_df.iloc[47, col] for col in (2, 4, 6, 8)]
# staff_nurse_budget = sum(staff_nurse_spends)
# assistant_nurse_budget = sum(assistant_nurse_spends)
# section = Section(
#     name="Budget allocated to date",
#     section_type=MetricTypeEnum.currency.name,
#     metrics=[
#         Metric(
#             name="Staff Nurse Budget",
#             metric_type=MetricTypeEnum.count.name,
#             value=staff_nurse_budget,
#             time=TimeValues(
#                 name="Spend over time",
#                 values=[
#                     TimeValue(month=months[i], name=month_names[i], value=value)
#                     for i, value in enumerate(staff_nurse_spends)
#                 ],
#             ),
#             gender=None,
#             age=None,
#             province=None,
#         ),
#         Metric(
#             name="Assistant Nurse Budget",
#             metric_type=MetricTypeEnum.count.name,
#             value=assistant_nurse_budget,
#             time=TimeValues(
#                 name="Spend over time",
#                 values=[
#                     TimeValue(month=months[i], name=month_names[i], value=value)
#                     for i, value in enumerate(assistant_nurse_spends)
#                 ],
#             ),
#             gender=None,
#             age=None,
#             province=None,
#         ),
#     ],
# )

# department.sections.append(section)

# Job opportunties

enrolled_nurse_df = pd.concat(
    [march_df.iloc[15:24, 0], march_df.iloc[15:24, 3]],
    axis=1,
).set_index(0)

enrolled_nurses_target = int(opportunity_targets_df.iloc[41, 2])
enrolled_nurses = opportunity_achievements_df.iloc[41, 10:14].tolist()

auxiliary_nurse_df = pd.concat(
    [march_df.iloc[15:24, 0], march_df.iloc[15:24, 4]], axis=1
).set_index(0)

auxiliary_nurses_target = int(opportunity_targets_df.iloc[42, 2])
auxiliary_nurses = opportunity_achievements_df.iloc[42, 10:14].tolist()

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name="Enrolled nurses",  # same as Staff Nurses
            metric_type=MetricTypeEnum.count.name,
            value=enrolled_nurses[-1],
            value_target=enrolled_nurses_target,
            province=ProvinceValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=int(enrolled_nurse_df.loc[province]),
                    )
                    for province in list(enrolled_nurse_df.index)
                ],
            ),
            time=TimeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(enrolled_nurses)
                ],
            ),
            gender=None,
            age=None,
        ),
        Metric(
            name="Auxiliary nurses",  # same as Assistant Nurses
            metric_type=MetricTypeEnum.count.name,
            value=auxiliary_nurses[-1],
            value_target=auxiliary_nurses_target,
            province=ProvinceValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=auxiliary_nurse_df.loc[province],
                    )
                    for province in list(auxiliary_nurse_df.index)
                ],
            ),
            time=TimeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(auxiliary_nurses)
                ],
            ),
            gender=None,
            age=None,
        ),
    ],
)

chw_achievements = opportunity_achievements_df.iloc[39:40, 13]
chw_programmes = [
    Metric(
        name=entry[1].iloc[0],
        metric_type=MetricTypeEnum.count.name,
        value=int(opportunity_achievements_df.fillna(0).iloc[entry[0], 13]),
        value_target=entry[1].iloc[1],
        age=None,
        gender=None,
        province=None,
        time=None,
    )
    for entry in opportunity_targets_df.iloc[
        department.target_lines[0] : department.target_lines[1] - 2, 1:3
    ].iterrows()
]

section.metrics.extend(chw_programmes)

department.sections.append(section)

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## DALLR

In [None]:
department_name = "Agriculture, Land Reform and Rural Development"
sheet_name = "DALLRD"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[8, 14],
    achievement_lines=[9, 14],
    implementation_details=[],
)

january_df = pd.read_excel(january_excel, sheet_name=sheet_name, header=None)
march_df = pd.read_excel(march_excel, sheet_name=sheet_name, header=None)

dallrd_budget_target = 1_000_000_000
dallrd_livelihoods_target = opportunity_targets_df.iloc[8, 2]
dallrd_livelihoods_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()

graduate_employment_budget_target = january_df.iloc[58, 1]
# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dallrd_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        #         Metric(
        #             name="Graduate Employment budget",
        #             metric_type=MetricTypeEnum.currency.name,
        #             value=graduate_employment_budget_target,
        #             time=None,
        #             gender=None,
        #             province=None,
        #             age=None,
        #         ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dallrd_livelihoods_achieved,
            value_target=dallrd_livelihoods_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Budget allocation

# NO INFO AVAILABLE

# Job opportunties - for DALLR this is graduate employment

# TODO: verify this. Removed for now because no matching programme description
# graduate_employment_by_province_df = march_df.iloc[43:52, :2].set_index(0)
# graduate_employment_total = march_df.iloc[52, 1]
# male_employment = march_df.iloc[54, 1] / 100
# female_employment = march_df.iloc[55, 1] / 100
# youth_employment = march_df.iloc[56, 1] / 100
# section = Section(
#     name=section_titles[SectionEnum.job_opportunities.name],
#     section_type=SectionEnum.job_opportunities.name,
#     metrics=[
#         Metric(
#             name="Graduate Employment",
#             metric_type=MetricTypeEnum.count.name,
#             value=graduate_employment_total,
#             province=ProvinceValues(
#                 name=metric_titles[SectionEnum.job_opportunities.name][
#                     MetricTypeEnum.count.name + "_province"
#                 ],
#                 values=[
#                     ProvinceValue(
#                         province=province_to_abbrev[province],
#                         value=graduate_employment_by_province_df.loc[province],
#                     )
#                     for province in list(graduate_employment_by_province_df.index)
#                 ],
#             ),
#             time=None,
#             gender=GenderValues(
#                 name=metric_titles[SectionEnum.job_opportunities.name][
#                     MetricTypeEnum.count.name + "_gender"
#                 ],
#                 values=[
#                     GenderValue(gender=GenderEnum.Male.name, value=male_employment),
#                     GenderValue(gender=GenderEnum.Female.name, value=female_employment),
#                 ],
#             ),
#             age=AgeValues(
#                 name=metric_titles[SectionEnum.job_opportunities.name][
#                     MetricTypeEnum.count.name + "_age"
#                 ],
#                 values=[AgeValue(age_category="18-35", value=youth_employment)],
#             ),
#         )
#     ],
# )

# department.sections.append(section)

# department.sections.append(section)

# # Jobs retained

# # NO DATA

# # Livelihoods

# there are 5 programmes in columns 1 through 5
livelihoods_by_province = march_df.iloc[11:20, :6].set_index(0)
livelihoods_by_province.columns = [
    name.strip() for name in list(march_df.iloc[10, 1:6])
]
livelihoods_totals = list(march_df.iloc[20, 1:6])

livelihoods_programme_names = list(livelihoods_by_province.columns)

livelihoods_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[9:14, 1],
        opportunity_achievements_df.iloc[9:14, 10:14],
    ],
    axis=1,
).set_index(1)
livelihoods_achievements_df.index = pd.Index(
    [name.strip() for name in list(livelihoods_achievements_df.index)]
)

livelihood_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 12:14
        ],
    ],
    axis=1,
).set_index(1)

livelihood_programme_metrics = [
    Metric(
        name=programme_name,
        metric_type=MetricTypeEnum.count.name,
        value=livelihoods_achievements_df.loc[programme_name].iloc[-1],
        province=None,
        #         province=ProvinceValues(
        #             name="Livelihoods by province",
        #             values=[
        #                 ProvinceValue(
        #                     province=province_to_abbrev[province],
        #                     value=livelihoods_by_province.loc[province, programme_name],
        #                 )
        #                 for j, province in enumerate(list(livelihoods_by_province.index))
        #             ],
        #         ),
        gender=None,
        #         gender=GenderValues(
        #             name="Livelihoods by gender",
        #             values=[
        #                 GenderValue(
        #                     gender=GenderEnum.Male.name,
        #                     value=january_df.iloc[24].iloc[1 + i] / 100,
        #                 ),
        #                 GenderValue(
        #                     gender=GenderEnum.Female.name,
        #                     value=january_df.iloc[25].iloc[1 + i] / 100,
        #                 ),
        #             ],
        #         ),
        time=TimeValues(
            name=metric_titles[SectionEnum.livelihoods.name][
                MetricTypeEnum.count.name + "_time"
            ],
            values=[
                TimeValue(
                    month=month,
                    name=month_names[month_index],
                    value=int(
                        livelihoods_achievements_df.loc[programme_name].iloc[
                            month_index
                        ]
                    ),
                )
                for month_index, month in enumerate(months)
            ],
        ),
        age=None,
        # THIS WAS REMOVED BECAUSE OF HOW THE DATA IS PRESENTED
        #         age=AgeValues(
        #             name="Livelihood by age",
        #             values=[
        #                 AgeValue(
        #                     age_category="18-35", value=january_df.iloc[26].iloc[1 + i] / 100
        #                 )
        #             ],
        #         ),
    )
    for i, programme_name in enumerate(livelihoods_programme_names)
]

section = Section(
    name=section_titles[SectionEnum.livelihoods.name],
    section_type=SectionEnum.livelihoods.name,
    metrics=livelihood_programme_metrics,
)

department.sections.append(section)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## DSI

In [None]:
department_name = "Science and Innovation"
sheet_name = "DSI"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[43, 47],
    achievement_lines=[43, 47],
    implementation_details=[],
)


dsi_budget_target = 45_000_000
dsi_jobs_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()  # sum the 4 programmes
dsi_jobs_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()

dsi_programme_targets_df = opportunity_targets_df.iloc[43:47, 1:3].set_index(1)
programme_target_metrics = [
    Metric(
        name=programme_name,
        metric_type=MetricTypeEnum.count.name,
        value=dsi_programme_targets_df.loc[programme_name].iloc[0],
        time=None,
        gender=None,
        age=None,
        province=None,
    )
    for programme_name in list(dsi_programme_targets_df.index)
]

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dsi_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name="Overall jobs created",
            metric_type=MetricTypeEnum.count.name,
            value=dsi_jobs_achieved,
            value_target=dsi_jobs_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)
# section.metrics.extend(programme_target_metrics)

department.sections.append(section)

# Job creation results

dsi_targets_df = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 1:3
].set_index(1)

dsi_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 10:14
        ],
    ],
    axis=1,
).set_index(1)

target_to_achievement_programme_name = dict(
    zip(dsi_targets_df.index, dsi_achievements_df.index)
)
t_2_a = target_to_achievement_programme_name

department.sections.append(
    Section(
        name=programme_name,
        section_type=SectionEnum.job_opportunities.name,
        metrics=[
            Metric(
                name=programme_name.strip(),
                metric_type=MetricTypeEnum.count.name,
                value=dsi_achievements_df.loc[t_2_a[programme_name]].iloc[-1],
                value_target=dsi_targets_df.loc[programme_name].iloc[-1],
                time=TimeValues(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_time"
                    ],
                    values=[
                        TimeValue(month=months[i], name=month_names[i], value=value)
                        for i, value in enumerate(
                            dsi_achievements_df.loc[t_2_a[programme_name]]
                        )
                    ],
                )
                if dsi_achievements_df.loc[t_2_a[programme_name]].iloc[-1] > 0
                else None,
                gender=None,
                age=None,
                province=None,
            )
            for programme_name in dsi_targets_df.index
        ],
    )
)


all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## DSAC

In [None]:
department_name = "Sports, Arts and Culture"
sheet_name = "DSAC"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[25, 37],
    achievement_lines=[25, 37],
    implementation_details=[],
)

march_df = pd.read_excel(march_excel, sheet_name=sheet_name, header=None)

dsac_budget_target = 525_000_000
dsac_support_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()
dsac_support_achievement = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()

# dsac_programme_targets_df = opportunity_targets_df.iloc[department.target_lines[0]:department.target_lines[1], 1:3].set_index(1)

# programme_target_metrics = [
#     Metric(
#         name=programme_name,
#         metric_type=MetricTypeEnum.count.name,
#         value=dsac_programme_targets_df.loc[programme_name].iloc[0],
#         time=None,
#         gender=None,
#         age=None,
#         province=None,
#     )
#     for programme_name in list(dsac_programme_targets_df.index)
# ]

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dsac_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dsac_support_achievement,
            value_target=dsac_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)
# section.metrics.extend(programme_target_metrics)

department.sections.append(section)

# Job creation results

programme_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 10:14
        ],
    ],
    axis=1,
).set_index(1)


programme_targets_df = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 1:3
].set_index(1)

cre_provincial_breakdown = march_df.iloc[10:19, :8].fillna(0).set_index(0)
cre_provincial_breakdown.columns = march_df.iloc[9, 1:8]

ret_provincial_breakdown = march_df.iloc[55:64, :4].fillna(0).set_index(0)
ret_provincial_breakdown.columns = march_df.iloc[54, 1:4]
ret_provincial_breakdown

liv_provincial_breakdown = march_df.iloc[86:95, :3].fillna(0).set_index(0)
liv_provincial_breakdown.columns = march_df.iloc[85, 1:3]
liv_provincial_breakdown

provincial_breakdown = pd.concat(
    [cre_provincial_breakdown, ret_provincial_breakdown, liv_provincial_breakdown],
    axis=1,
).fillna(0)

cre_gender_breakdown = march_df.fillna(0).iloc[36:38, :8].set_index(0)
cre_gender_breakdown.columns = march_df.iloc[35, 1:8]

ret_gender_breakdown = march_df.fillna(0).iloc[69:71, :4].set_index(0)
ret_gender_breakdown.columns = march_df.iloc[68, 1:4]

liv_gender_breakdown = march_df.iloc[100:102, :3].fillna(0).set_index(0)
liv_gender_breakdown.columns = march_df.iloc[99, 1:3]
liv_gender_breakdown

gender_breakdown = pd.concat(
    [cre_gender_breakdown, ret_gender_breakdown, liv_gender_breakdown], axis=1
).fillna(0)

cre_age_breakdown = march_df.iloc[38:39, :8].set_index(0)
cre_age_breakdown.columns = march_df.iloc[9, 1:8]
cre_age_breakdown

ret_age_breakdown = march_df.iloc[71:72, :4].set_index(0)
ret_age_breakdown.columns = march_df.iloc[68, 1:4]

liv_age_breakdown = march_df.iloc[102:103, :3].fillna(0).set_index(0)
liv_age_breakdown.columns = march_df.iloc[99, 1:3]

age_breakdown = pd.concat(
    [cre_age_breakdown, ret_age_breakdown, liv_age_breakdown], axis=1
).fillna(0)


# programme_achievement_metrics_jobs = [
#     Metric(
#         name=programme_name,
#         metric_type=MetricTypeEnum.count.name,
#         value=programme_achievements_df.loc[programme_name].iloc[-1],
#         time=TimeValues(
#             name=metric_titles[SectionEnum.job_opportunities.name][
#                 MetricTypeEnum.count.name + "_time"
#             ],
#             values=[
#                 TimeValue(month=months[i], name=month_names[i], value=value)
#                 for i, value in enumerate(
#                     list(programme_achievements_df.loc[programme_name])
#                 )
#             ],
#         )
#         if programme_achievements_df.loc[programme_name].sum() > 0
#         else None,
#         gender=None,
#         age=None,
#         province=None,
#     )
#     for programme_name in list(programme_achievements_df.index)
#     if opportunity_type_df.loc[programme_name].iloc[0] == "CRE"
# ]

for section_type, name, filter_by in [
    (
        SectionEnum.job_opportunities.name,
        section_titles[SectionEnum.job_opportunities.name],
        "CRE",
    ),
    (SectionEnum.jobs_retain.name, section_titles[SectionEnum.jobs_retain.name], "RET"),
    (SectionEnum.livelihoods.name, section_titles[SectionEnum.livelihoods.name], "LIV"),
]:
    programme_achievement_metrics = [
        Metric(
            name=programme_name.strip(),
            metric_type=MetricTypeEnum.count.name,
            value=programme_achievements_df.loc[programme_name].iloc[-1],
            time=TimeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=value)
                    for i, value in enumerate(
                        list(programme_achievements_df.loc[programme_name])
                    )
                ],
            )
            if programme_achievements_df.loc[programme_name].sum() > 0
            else None,
            gender=GenderValues(
                name=metric_titles[section_type][MetricTypeEnum.count.name + "_gender"],
                values=[
                    GenderValue(
                        gender=gender.strip(),
                        value=float(gender_breakdown.loc[gender, programme_name]),
                    )
                    for gender in gender_breakdown.index
                ],
            )
            if pd.to_numeric(gender_breakdown.loc[:, programme_name]).sum() > 0.0
            else None,  # force to numeric because of the text in some of the incoming data
            age=AgeValues(
                name=metric_titles[section_type][MetricTypeEnum.count.name + "_age"],
                values=[
                    AgeValue(
                        age_category=age_breakdown.index[0].strip(),
                        value=float(age_breakdown.loc[:, programme_name].iloc[0]),
                    )
                ],
            )
            if float(age_breakdown.loc[:, programme_name].iloc[0]) != 0
            else None,
            province=ProvinceValues(
                name=metric_titles[section_type][
                    MetricTypeEnum.count.name + "_province"
                ],
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=int(provincial_breakdown.loc[province, programme_name]),
                    )
                    for province in provincial_breakdown.index
                ],
            )
            if provincial_breakdown.loc[:, programme_name].sum() != 0
            else None,
            value_target=programme_targets_df.loc[programme_name].iloc[0],
        )
        for programme_name in list(programme_achievements_df.index)
        if opportunity_type_df.loc[programme_name].iloc[0] == filter_by
    ]
    section = Section(
        name=name, section_type=section_type, metrics=programme_achievement_metrics
    )
    department.sections.append(section)

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DoT

In [None]:
department_name = "Transport"
sheet_name = "DoT"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[24, 25],
    achievement_lines=[24, 25],
    implementation_details=[],
)

march_df = pd.read_excel(march_excel, sheet_name=sheet_name, header=None)

dot_budget_target = 630_000_000
dot_support_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()
dot_support_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()
# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dot_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dot_support_achieved,
            value_target=dot_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

rural_road_achievement_df = opportunity_achievements_df.iloc[24, 10:14]
prov_rural_road_df = march_df.iloc[15:24, :2].fillna(0).set_index(0)
section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name="Rural roads maintenance",
            metric_type=MetricTypeEnum.count.name,
            value=rural_road_achievement_df.iloc[-1],
            value_target=dot_support_target,
            time=TimeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=int(amount))
                    for i, amount in enumerate(rural_road_achievement_df)
                ],
            ),
            gender=None,
            age=None,
            province=ProvinceValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=int(prov_rural_road_df.loc[province].iloc[0]),
                    )
                    for province in list(prov_rural_road_df.index)
                ],
            ),
        )
    ],
)

department.sections.append(section)

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## DPWI

In [None]:
department_name = "Public Works and Infrastructure"
sheet_name = "DPWI"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[47, 56],
    achievement_lines=[46, 54],
    implementation_details=[],
)

march_df = pd.read_excel(march_excel, sheet_name=sheet_name, header=None)

dpwi_budget_target = 159_000_000
dpwi_support_target = opportunity_targets_df.iloc[47, 2]
dpwi_support_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()
# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dot_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dpwi_support_achieved,
            value_target=dot_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

dpwi_job_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 10:14
        ],
    ],
    axis=1,
).set_index(1)

programme_names = list(march_df.iloc[8, 1:8])

dpwi_jobs_by_province_df = march_df.iloc[9:18, :8].fillna(0).set_index(0)
section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name.strip(),
            metric_type=MetricTypeEnum.count.name,
            value=dpwi_job_achievements_df.loc[programme_name].iloc[
                -1
            ],  # we don't know per programme targets
            time=TimeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=int(value))
                    for i, value in enumerate(
                        dpwi_job_achievements_df.loc[programme_name]
                    )
                ],
            )
            if dpwi_job_achievements_df.loc[programme_name].sum() > 0
            else None,
            gender=GenderValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_gender"
                ],
                values=[
                    GenderValue(
                        gender=GenderEnum.Male.name,
                        value=march_df.iloc[35, 1:8].fillna(0).iloc[programme_index],
                    ),
                    GenderValue(
                        gender=GenderEnum.Female.name,
                        value=march_df.iloc[36, 1:8].fillna(0).iloc[programme_index],
                    ),
                ],
            ),
            age=AgeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_age"
                ],
                values=[
                    AgeValue(
                        age_category="18-35",
                        value=march_df.iloc[37, 1:8].fillna(0).iloc[programme_index],
                    )
                ],
            ),
            province=ProvinceValues(
                metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                values=[
                    ProvinceValue(
                        province=province_to_abbrev[province],
                        value=int(
                            dpwi_jobs_by_province_df.loc[province].iloc[programme_index]
                        ),
                    )
                    for province in list(dpwi_jobs_by_province_df.index)
                ],
            ),
        )
        for programme_index, programme_name in enumerate(programme_names)
    ],
)

department.sections.append(section)
# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## DEFF

In [None]:
department_name = "Environment, Forestry and Fisheries"
sheet_name = "DEFF"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[14, 24],
    achievement_lines=[14, 24],
    implementation_details=[],
)

deff_budget_target = 1_983_000 * 1000
deff_support_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()

deff_support_achievements = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1], 13
].sum()

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=deff_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=deff_support_achievements,
            value_target=deff_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

deff_programme_targets = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 1:3
].set_index(1)

deff_job_achievements_df = (
    pd.concat(
        [
            opportunity_achievements_df.iloc[
                department.achievement_lines[0] : department.achievement_lines[1], 1
            ],
            opportunity_achievements_df.iloc[
                department.achievement_lines[0] : department.achievement_lines[1], 10:14
            ],
        ],
        axis=1,
    )
    .set_index(1)
    .fillna(0)
)

programme_names = list(deff_job_achievements_df.index)

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name.strip(),
            metric_type=MetricTypeEnum.count.name,
            value=int(deff_job_achievements_df.loc[programme_name].iloc[-1]),
            value_target=deff_programme_targets.loc[programme_name],
            time=TimeValues(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                values=[
                    TimeValue(month=months[i], name=month_names[i], value=int(value))
                    for i, value in enumerate(
                        deff_job_achievements_df.loc[programme_name]
                    )
                ],
            ),
            gender=None,
            province=None,
            age=None,
        )
        for programme_index, programme_name in enumerate(programme_names)
    ],
)

department.sections.append(section)
# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## COGTA

In [None]:
department_name = "Co-operative government"
sheet_name = "DCOGTA"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[37, 38],
    achievement_lines=[37, 38],
    implementation_details=[],
)

dcogta_budget_target = 50_000_000
dcogta_support_target = (
    opportunity_targets_df.fillna(0)
    .iloc[department.target_lines[0] : department.target_lines[1], 2]
    .sum()
)
dcogta_support_achievements = (
    opportunity_achievements_df.fillna(0)
    .iloc[department.achievement_lines[0] : department.achievement_lines[1], 13]
    .sum()
)

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dcogta_budget_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dcogta_support_achievements,
            value_target=dcogta_support_target,
            time=None,
            gender=None,
            province=None,
            age=None,
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

# NO DATA

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments.append(department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

print(department.to_json(indent=2))

## Overview picture

In [None]:
# not aligned: the target and achievement sections do not align
# DALLRD: target is entirely 'Subsistence relief fund': LIV
# DOH: target CHW and Outreach Team Leaders is collapsed into 1 in achievements: CRE
# DPWI: target is entirely 'Graduate programmes': CRE
not_aligned = ["DALLRD", "DOH", "DPWI"]

all_data.overview.sections = []

dallrd_dept = [
    dept for dept in all_data.departments if dept.sheet_name.upper() == "DALLRD"
][0]
doh_dept = [dept for dept in all_data.departments if dept.sheet_name.upper() == "DOH"][
    0
]
dpwi_dept = [
    dept for dept in all_data.departments if dept.sheet_name.upper() == "DPWI"
][0]

total_target = 0
total_achievement = 0
section_details = dict(
    LIV=dict(
        name=section_titles[SectionEnum.livelihoods.name + "_overview"],
        section_type=SectionEnum.livelihoods.name,
    ),
    CRE=dict(
        name=section_titles[SectionEnum.job_opportunities.name + "_overview"],
        section_type=SectionEnum.job_opportunities.name,
    ),
    RET=dict(
        name=section_titles[SectionEnum.jobs_retain.name + "_overview"],
        section_type=SectionEnum.jobs_retain.name,
    ),
)
for opportunity_type in ("LIV", "CRE", "RET"):
    metrics = []
    for dept in all_data.departments:
        if dept.sheet_name.upper() in not_aligned:
            continue
        #         if dept.sheet_name == "DEFF":
        #             # a horrible hack because DEFF has data, sometimes, for November 2020, but not for January 2021
        #             opportunity_achievements_df.iloc[
        #                 dept.achievement_lines[0] : dept.achievement_lines[1], 13
        #             ][
        #                 opportunity_achievements_df.iloc[
        #                     dept.achievement_lines[0] : dept.achievement_lines[1], 13
        #                 ]
        #                 == 0
        #             ] = opportunity_achievements_df.iloc[
        #                 dept.achievement_lines[0] : dept.achievement_lines[1], 11
        #             ][
        #                 opportunity_achievements_df.iloc[
        #                     dept.achievement_lines[0] : dept.achievement_lines[1], 13
        #                 ]
        #                 == 0
        #             ]
        filter_list = list(
            opportunity_targets_df.iloc[dept.target_lines[0] : dept.target_lines[1], 4]
            == opportunity_type
        )
        in_section = functools.reduce(lambda x, y: x or y, filter_list, False)
        if not in_section:
            continue
        target = int(
            opportunity_targets_df.iloc[dept.target_lines[0] : dept.target_lines[1], 2][
                filter_list
            ].sum()
        )
        total_target += target
        achievement = int(
            opportunity_achievements_df.iloc[
                dept.achievement_lines[0] : dept.achievement_lines[1], 13
            ][filter_list].sum()
        )
        total_achievement += achievement
        metrics.append(
            Metric(
                name=dept.name,
                metric_type=MetricTypeEnum.count.name,
                value=achievement,
                value_target=target,
                time=None,
                gender=None,
                age=None,
                province=None,
            )
        )
    # for the unaligned sections we can't use the "filter list approach"
    # - luckily for now all of these are for departments with single opportunity type
    if opportunity_type == "LIV":
        for dept in [dallrd_dept]:
            achievement = int(
                opportunity_achievements_df.iloc[
                    dept.achievement_lines[0] : dept.achievement_lines[1], 13
                ].sum()
            )
            target = int(
                opportunity_targets_df.iloc[
                    dept.target_lines[0] : dept.target_lines[1], 2
                ].sum()
            )
            total_achievement += achievement
            total_target += target
            metrics.append(
                Metric(
                    name=dept.name,
                    metric_type=MetricTypeEnum.count.name,
                    value=achievement,
                    value_target=target,
                    time=None,
                    gender=None,
                    age=None,
                    province=None,
                )
            )
    elif opportunity_type == "CRE":
        for dept in [doh_dept, dpwi_dept]:
            achievement = int(
                opportunity_achievements_df.iloc[
                    dept.achievement_lines[0] : dept.achievement_lines[1], 13
                ].sum()
            )
            target = int(
                opportunity_targets_df.iloc[
                    dept.target_lines[0] : dept.target_lines[1], 2
                ].sum()
            )
            total_achievement += achievement
            total_target += target
            metrics.append(
                Metric(
                    name=dept.name,
                    metric_type=MetricTypeEnum.count.name,
                    value=achievement,
                    value_target=target,
                    time=None,
                    gender=None,
                    age=None,
                    province=None,
                )
            )
    section = OverviewSection(
        name=section_details[opportunity_type]["name"],
        section_type=section_details[opportunity_type]["section_type"],
        metric_type=MetricTypeEnum.count.name,
        value=sum([metric.value for metric in metrics]),
        value_target=sum([metric.value_target for metric in metrics]),
        metrics=metrics,
    )
    all_data.overview.sections.append(section)


# print(all_data.overview.to_json(indent=2))
# check that our sums add up to the total in the target section of the spreadsheet
assert (
    sum([section.value_target for section in all_data.overview.sections])
    == opportunity_targets_df.iloc[56, 2]
)

In [None]:
# ad_set = set()
# for department in all_data.departments:
#     for section in department.sections:
#         if section.section_type == SectionEnum.targets.name:
#             continue
#         for metric in section.metrics:
#             ad_set.add(metric.name)
# #             if metric.name not in implementation_status_df.Programme.values:
# #                 print(metric.name)
# # pn = implementation_status_df.Programme.iloc[0]
# # pn in implementation_status_df.Programme.values

# ot_set = set(list(opportunity_targets_df.iloc[2:55, 1]))
# imp_set = set(list(implementation_status_df.iloc[:53, 1]))
# ot_set.difference(imp_set)
# imp_set.difference(ot_set)

# target_to_imp_programme_mapping = {
#     "Banking with art, connecting Lives - National Museum Bloemfontein": " Banking with art, connecting Lives - National Museum Bloemfontein",
#     "CSIR - Experiential Training Programme": "CSIR - Experiential Training Programme ",
#     "Community Health Workers": "Community health workers",
#     "Covid-19 Return-To-Play - National Sport Federations": "Covid-19 Return-To-Play - National Sport Federations                                                                                                                                    ",
#     "Digitisation of records - National Library of South Africa": "Digitisation of records - National Library of South Africa ",
#     "Facilities Management": "Facilities Management (PMTE) Employment: ",
#     "In-House Construction projects": "In-House Construction projects ",
#     "Job retention at fee paying schools": "Retain vulnerable teaching posts",
#     "Municipal infrastructure": "Mainstream labour intensive construction methods",
#     "Outreach Team Leaders": "Outreach team leaders",
#     "Oceans and Coast: Source to Sea": "Oceans and Coast: Source to Sea ",
#     "Provincial Roads Maintenance": "Rural roads maintenance",
#     "Real Estate": "Real Estate  (PMTE)",
#     "Services sector development incentives": "Global Business Services Sector",
#     "Subsistence relief fund": "Subsistence producer relief fund",
#     "Retention of social workers": "Social workers",
#     "Vegetables and Fruits": "Vegetables and Fruits ",
#     "WRC - Water Graduate Employment Programme": " WRC - Water Graduate Employment Programme ",
#     "Water and Energy Efficiency": "Water and Energy Efficiency (Green Economy)",
#     "Water and Sanitation Facilities Management": "Water and Sanitation Facilities Management (PMTE)",
#     "Welisizwe Rural Bridges Programme": "Welisizwe Rural Bridges Programme (PMTE) ",
# }

# target_to_achieve_programme_mapping = {
#     "Community Health Workers": "Community health workers",
#     "Graduate programmes (Property Management Trading Entity)": None,
#     "Job retention at fee paying schools": "Retain vulnerable teaching posts",
#     "Municipal infrastructure": "Mainstream labour intensive construction methods",
#     "Outreach Team Leaders": "Outreach team leaders",
#     "Provincial Roads Maintenance": "Rural roads maintenance",
#     "Real Estate ": "Real Estate",
#     "Retention of social workers": "Social workers",
#     "Services sector development incentives": "Global Business Services Sector",
#     "Subsistence relief fund": None,
#     "WRC - Water Graduate Employment Programme ": " WRC - Water Graduate Employment Programme ",
#     "Water and Energy Efficiency ": "Water and Energy Efficiency",
#     "Water and Sanitation Facilities Management ": "Water and Sanitation Facilities Management",
#     "Welisizwe Rural Bridges Programme": "Welisizwe Rural Bridges Programme ",
# }

# len(ot_set)
# len(imp_set)

# oa_set = set(list(opportunity_achievements_df.iloc[2:54, 1]))
# oa_set.difference(ot_set)

# len(opportunity_achievements_df.iloc[3:54, 1])
# ot_set.difference(oa_set)

# ot_set.difference(ad_set)

# ot_set = set([s.strip() for s in opportunity_targets_df.iloc[2:55, 1]])

# ot_set.difference(ad_set)

# # ad_set.difference(ot_set)
# ad_set.difference(imp_set)
# for diff in sorted(ot_set.difference(imp_set)):
#     if diff not in target_to_imp_programme_mapping:
#         print(diff)

count = 0
for department in all_data.departments:
    if department.implementation_details is None:
        department.implementation_details = []
    for section in department.sections:
        if section.section_type == SectionEnum.targets.name:
            continue
        for metric in section.metrics:
            if (
                metric.name == "Graduate Employment"
            ):  # DALLRD Graduate Employment programme has not info
                continue
            key = (
                metric.name
                if metric.name in implementation_status_df.Programme.values
                else target_to_imp_programme_mapping[metric.name]
            )
            imp_info = implementation_status_df[
                implementation_status_df.Programme == key
            ]
            status_str = imp_info.Status.iloc[0]
            if status_str not in implementation_status_to_enum:
                continue
            status = implementation_status_to_enum[status_str]
            detail = imp_info.Detail.iloc[0]
            department.implementation_details.append(
                ImplementationDetail(
                    programme_name=metric.name, status=status, detail=detail
                )
            )
            count += 1
print(count)

# Save final data

In [None]:
output_filename = output_dir + "/all_data.json"
open(output_filename, "w").write(all_data.to_json(indent=2))
# print(all_data.to_json(indent=2))
print("DONE")

In [None]:
programme_status_df = pd.read_excel(
    mar_opportunities_excel, sheet_name="Implementation status", header=None
)

to_camel_case = lambda match: match.group(1) + match.group(2).upper() + match.group(3)

[
    re.sub(r"(\S*) (\w)(.*)", to_camel_case, status)
    for status in implementation_status_df.iloc[3:, 2].dropna().unique()
]

In [None]:
for dept in all_data.departments:
    print(f"\t'{dept.name}': '{dept.sheet_name}',")