In [1]:
%load_ext lab_black

In [2]:
%load_ext autoreload

%autoreload 2

In [28]:
from datetime import datetime
import functools
import json
import operator
import re
import sys

import altair as alt
import numpy as np
import pandas as pd

sys.path.append("../python-src")
from presidential_employment import *

In [4]:
print(sys.argv[0])

/home/pvh/miniconda3/envs/altair/lib/python3.9/site-packages/ipykernel_launcher.py


In [5]:
output_dir = "/home/pvh/Documents/code/pvh-forks/presidential-employment-stimulus/data"

### Data structure

Each department has a total budget and total opportunities target. 

The overall programme has outcome targets
1. Jobs created
2. Jobs retained
3. Livelihoods supported

Each department has a "blurb" describing their programme.

Within each department there are multiple programmes that can contribute to each of these targets.

Each programme has a demographic split of outcomes, with gender and youth percentages.

Each programme has a per-province split of outcomes.

Files:

`Consolidated data (Dec) - Presidential Employment Stimulus.xlsx` - December sheet

`Consolidated Presidential Employment Stimulus Reporting Template.xlsx` - January sheet

In [6]:
december_excel = "Consolidated data (Dec) - Presidential Employment Stimulus.xlsx"
january_excel = "Consolidated Presidential Employment Stimulus Reporting Template.xlsx"
doh_january_excel = (
    "DoH_Consolidated Appointment of Nurses - Verified by Provinces.xlsx"
)
march_excel = (
    "Consolidated Presidential Employment Stimulus Reporting Template 200321.xlsx"
)

apr_excel = (
    "Consolidated Presidential Employment Stimulus Reporting Template 120421.xlsx"
)

apr2_excel = (
    "Consolidated Presidential Employment Stimulus Reporting Template 180421.xlsx"
)

latest_excel = apr2_excel

jan_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 170321 categories.xlsx"
)
mar_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 230321.xlsx"
)
apr_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 020421.xlsx"
)

apr2_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 120421.xlsx"
)

apr3_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 130421.xlsx"
)

apr4_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 180421.xlsx"
)

apr5_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 190421.xlsx"
)

may_opportunities_excel = (
    "Dashboard input_PES targets and opportunities per month 4052021.xlsx"
)

latest_opportunities_excel = may_opportunities_excel

opportunity_targets_df = pd.read_excel(
    latest_opportunities_excel, sheet_name="Targets", header=None
).fillna(0)
opportunity_achievements_df = pd.read_excel(
    latest_opportunities_excel, sheet_name="Trends", header=None
).fillna(0)

implementation_status_df = pd.read_excel(
    latest_opportunities_excel,
    sheet_name="Implementation status",
    skiprows=2,
    usecols=range(4),
)

opportunity_type_df = pd.concat(
    [opportunity_targets_df.iloc[2:56, 1], opportunity_targets_df.iloc[2:56, 4]], axis=1
).set_index(1)

description_df = pd.read_excel(
    latest_opportunities_excel,
    sheet_name="Department Descriptions",
    index_col=0,
    usecols=range(3),
)

december_sheets = pd.ExcelFile(december_excel).sheet_names
january_sheets = pd.ExcelFile(january_excel).sheet_names
apr_sheets = pd.ExcelFile(apr_excel).sheet_names
latest_sheets = apr_sheets

In [7]:
leads = description_df.lead.to_dict()
paragraphs = description_df.paragraph.to_dict()

```
interface DepartmentMonth {
  month: number // 202101
  name: string // Basic Education
  lead: string // Strengthening the learning environment in schools
  paragraph: string
  sections: Array<{
    name: string // Budget allocated to date
    matrics: Array<{
      name: string // Educational and general assistants
      type: 'currency' | 'count'
      value: number
      valueTarget?: number
      time?: {
        name: string // spend
        values: Array<{
          month: number // 202101
          value: number
        }>
      }
      gender?: {
        name: string // opportunities
        values: Array<{
          gender: 'female' | 'male'
          value: number
        }>
      }
      age?: {
        name: string // opportunities
        values: Array<{
          age: string // 18-35
          value: number
        }>
      }
      province?: {
        name: string // opportunities
        values: Array<{
          province: 'EC' | 'FS' | 'GP' | 'KZN' | 'LP' | 'MP' | 'NC' | 'NW' | 'WC'
        }>
      }
    }>
  }>
}
```

## 

# Top level structure

In [8]:
all_data = Everything(
    overview=Overview(
        month=202103,
        name="Programme overview",
        lead=leads["overview"],
        paragraph=paragraphs["overview"],
        sections=[],
    ),
    departments=[],
)


def add_or_replace(departments, department):
    # if a department with sheet_name exists in the list, replace it with the new department, else append to list
    for i, el in enumerate(departments):
        if el.sheet_name == department.sheet_name:
            departments[i] = department
            break
    else:
        departments.append(department)
    return departments

## DTIC

In [9]:
department_name = "Trade, Industry and Competition"
sheet_name = "DTIC"

department = Department(
    month=202012,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[38, 39],
    achievement_lines=[42, 43],
    implementation_details=[],
)

budget_target = 120_000 * 1000
opportunities_target = int(
    opportunity_targets_df.iloc[
        department.target_lines[0] : department.target_lines[1], 2
    ]
)
opportunities_achieved = int(
    opportunity_achievements_df.iloc[
        department.achievement_lines[0] : department.achievement_lines[1],
        total_achievement_column,
    ]
)

# Programme targets for this department
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_achieved,
            value_target=opportunities_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

dtic_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

# Job opportunities created to date

percentage_male = dtic_df.iloc[41, 1]
percentage_female = dtic_df.iloc[42, 1]
percentage_youth = dtic_df.iloc[43, 1]


by_province_df = dtic_df.iloc[29:37, :2].set_index(0)

opportunities_achieved_by_time = opportunity_achievements_df.iloc[
    department.achievement_lines[0], achievement_columns
].tolist()

jobs_implemented_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1],
            achievement_columns,
        ],
    ],
    axis=1,
).set_index(1)


programme_name = str(opportunity_targets_df.iloc[department.target_lines[0], 1])

implementation_info = implementation_status_df[
    implementation_status_df.Programme == programme_name
]

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.count.name,
            value=int(jobs_implemented_df.iloc[0, -1]),
            value_target=int(
                opportunity_targets_df.iloc[
                    department.target_lines[0] : department.target_lines[1], 2
                ]
            ),
            dimensions=[
                Dimension(
                    viz=VizTypeEnum.line.name,
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_time"
                    ],
                    lookup=LookupTypeEnum.time.name,
                    values=[
                        MetricValue(key=months[i], value=value)
                        for i, value in enumerate(opportunities_achieved_by_time)
                    ],
                ),
                Dimension(
                    viz=VizTypeEnum.two_value.name,
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_gender"
                    ],
                    lookup=LookupTypeEnum.gender.name,
                    values=[
                        MetricValue(key=GenderEnum.Male.name, value=percentage_male),
                        MetricValue(
                            key=GenderEnum.Female.name, value=percentage_female
                        ),
                    ],
                ),
                Dimension(
                    viz=VizTypeEnum.percentile.name,
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_age"
                    ],
                    lookup=LookupTypeEnum.age.name,
                    values=[MetricValue(key="18-35", value=percentage_youth)],
                ),
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_province"
                    ],
                    viz=VizTypeEnum.bar.name,
                    lookup=LookupTypeEnum.province.name,
                    values=[
                        MetricValue(
                            key=province_to_abbrev[province],
                            value=int(by_province_df.loc[province]),
                        )
                        for province in by_province_df.index
                    ],
                ),
            ],
        )
    ],
)

department.sections.append(section)


all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)
# print(department.to_json(indent=2))

## DBE

In [10]:
department_name = "Basic Education"
sheet_name = "DBE"

department = Department(
    month=202103,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[2, 5],
    achievement_lines=[6, 9],
    implementation_details=[],
)

budget_target = 7_000_000 * 1000
# opportunities_target = 344_933
opportunities_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()

dbe_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

ed_assistants_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0], total_achievement_column
]
gen_assistants_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] + 1, total_achievement_column
]
posts_saved_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] + 2, total_achievement_column
]

opportunities_achieved = sum(
    [posts_saved_achieved, ed_assistants_achieved, gen_assistants_achieved]
)

# Programme targets for this department
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_achieved,
            value_target=opportunities_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

# Job opportunities created to date

ed_assistants_column = 1
gen_assistants_column = 2

ed_assistants_by_prov_df = dbe_df.iloc[10:19, :2].set_index(0)
gen_assistants_by_prov_df = dbe_df.iloc[10:19, :3].drop(1, axis=1).set_index(0)

ed_assistants_target = opportunity_targets_df.iloc[department.target_lines[0], 2]
gen_assistants_target = opportunity_targets_df.iloc[department.target_lines[0] + 1, 2]

gender_row = 36
age_row = 38
job_opportunity_programmes = [
    dict(
        name=str(opportunity_targets_df.iloc[department.target_lines[0], 1]),
        totals=list(
            opportunity_achievements_df.iloc[
                department.achievement_lines[0], achievement_columns
            ]
        ),
        province=ed_assistants_by_prov_df,
        gender={
            GenderEnum.Male.name: float(dbe_df.iloc[gender_row, ed_assistants_column]),
            GenderEnum.Female.name: float(
                dbe_df.iloc[gender_row + 1, ed_assistants_column]
            ),
        },
        age=float(dbe_df.iloc[age_row, ed_assistants_column]),
        target=ed_assistants_target,
    ),
    dict(
        name=str(opportunity_targets_df.iloc[department.target_lines[0] + 1, 1]),
        totals=list(
            opportunity_achievements_df.iloc[
                department.achievement_lines[0] + 1, achievement_columns
            ]
        ),
        province=gen_assistants_by_prov_df,
        gender={
            GenderEnum.Male.name: float(dbe_df.iloc[gender_row, gen_assistants_column]),
            GenderEnum.Female.name: float(
                dbe_df.iloc[gender_row + 1, gen_assistants_column]
            ),
        },
        age=float(dbe_df.iloc[age_row, gen_assistants_column]),
        target=gen_assistants_target,
    ),
]


metrics = []
for programme in job_opportunity_programmes:
    metric = Metric(
        name=programme["name"],
        metric_type=MetricTypeEnum.count.name,
        dimensions=[
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_gender"
                ],
                viz=VizTypeEnum.two_value.name,
                lookup=LookupTypeEnum.gender.name,
                values=[
                    MetricValue(key=gender, value=value)
                    for gender, value in programme["gender"].items()
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_age"
                ],
                viz=VizTypeEnum.percentile.name,
                lookup=LookupTypeEnum.age.name,
                values=[MetricValue(key="18-35", value=programme["age"])],
            ),
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                viz=VizTypeEnum.bar.name,
                lookup=LookupTypeEnum.province.name,
                values=[
                    MetricValue(
                        key=province_to_abbrev[province],
                        value=int(
                            str(programme["province"].loc[province].iloc[0]).replace(
                                " ", ""
                            )
                        ),
                    )
                    for province in programme["province"].index
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                viz="line",
                lookup=LookupTypeEnum.time.name,
                values=[
                    MetricValue(key=months[i], value=value)
                    for i, value in enumerate(programme["totals"])
                ],
            ),
        ],
        value=programme["totals"][-1],
        value_target=programme["target"],
    )
    metrics.append(metric)

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=metrics,
)

department.sections.append(section)

# Jobs retained

jobs_retained_over_time_df = dbe_df.iloc[10:19, :4].drop([1, 2], axis=1).set_index(0)

jobs_retained_over_time = opportunity_achievements_df.iloc[
    department.achievement_lines[0] + 2, achievement_columns
].tolist()
posts_saved_target = opportunity_targets_df.iloc[department.target_lines[0] + 2, 2]

section = Section(
    name=section_titles[SectionEnum.jobs_retain.name],
    section_type=SectionEnum.jobs_retain.name,
    metrics=[
        Metric(
            name=str(opportunity_targets_df.iloc[department.target_lines[0] + 2, 1]),
            metric_type=MetricTypeEnum.count.name,
            dimensions=[
                Dimension(
                    name=metric_titles[SectionEnum.jobs_retain.name][
                        MetricTypeEnum.count.name + "_time"
                    ],
                    viz=VizTypeEnum.line.name,
                    lookup=LookupTypeEnum.time.name,
                    values=[
                        MetricValue(key=months[i], value=value)
                        for i, value in enumerate(jobs_retained_over_time)
                    ],
                ),
                Dimension(
                    name=metric_titles[SectionEnum.jobs_retain.name][
                        MetricTypeEnum.count.name + "_province"
                    ],
                    viz=VizTypeEnum.bar.name,
                    lookup=LookupTypeEnum.province.name,
                    values=[
                        MetricValue(
                            key=province_to_abbrev[province],
                            value=int(jobs_retained_over_time_df.loc[province]),
                        )
                        for province in gen_assistants_by_prov_df.index
                    ],
                ),
            ],
            value=posts_saved_achieved,
            value_target=posts_saved_target,
        ),
    ],
)

department.sections.append(section)

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)
# print(department.to_json(indent=2))

## DSD

In [11]:
department_name = "Social Development"
sheet_name = "DSD"

department = Department(
    month=202103,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[5, 8],
    achievement_lines=[9, 12],
    implementation_details=[],
)


budget_target = 588_728 * 1000
beneficiaries_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()

dsd_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

beneficiaries_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()

# Programme targets for this department
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=beneficiaries_achieved,
            value_target=beneficiaries_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

# Jobs added

# TODO: update this when data on trends becomes available

department.sections.append(
    Section(
        name=section_titles[SectionEnum.job_opportunities.name],
        section_type=SectionEnum.job_opportunities.name,
        metrics=[
            Metric(
                name="Registration support officers",
                metric_type=MetricTypeEnum.count.name,
                value=int(
                    opportunity_achievements_df.iloc[
                        department.achievement_lines[0] + 1, total_achievement_column
                    ]
                ),
                value_target=int(
                    opportunity_targets_df.iloc[department.target_lines[0] + 1, 2]
                ),
                dimensions=[],
            )
        ],
    )
)

# Jobs retained

social_worker_jobs = opportunity_achievements_df.iloc[
    department.achievement_lines[0] + 2, achievement_columns
].tolist()
social_worker_job_target = int(
    opportunity_targets_df.iloc[department.target_lines[0] + 2, 2]
)
# soc_worker_dec_df = december_df.iloc[10:19, :2].set_index(0)
# soc_worker_jan_df = january_df.iloc[10:19, :2].set_index(0)
soc_worker_province_breakdown_df = dsd_df.iloc[10:19, :2].set_index(0)

jobs_retained = [
    Metric(
        name="Retention of social workers",
        metric_type=MetricTypeEnum.count.name,
        value=social_worker_jobs[-1],
        value_target=social_worker_job_target,
        dimensions=[
            Dimension(
                name=metric_titles[SectionEnum.jobs_retain.name][
                    MetricTypeEnum.count.name + "_gender"
                ],
                viz=VizTypeEnum.two_value.name,
                lookup=LookupTypeEnum.gender.name,
                values=[
                    MetricValue(key=GenderEnum.Male.name, value=dsd_df.iloc[36, 1]),
                    MetricValue(key=GenderEnum.Female.name, value=dsd_df.iloc[37, 1]),
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.jobs_retain.name][
                    MetricTypeEnum.count.name + "_age"
                ],
                viz=VizTypeEnum.percentile.name,
                lookup=LookupTypeEnum.age.name,
                values=[MetricValue(key="18-35", value=dsd_df.iloc[38, 1])],
            ),
            Dimension(
                name=metric_titles[SectionEnum.jobs_retain.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                viz=VizTypeEnum.line.name,
                lookup=LookupTypeEnum.time.name,
                values=[
                    MetricValue(key=months[i], value=value)
                    for i, value in enumerate(social_worker_jobs)
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.jobs_retain.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                viz=VizTypeEnum.bar.name,
                lookup=LookupTypeEnum.province.name,
                values=[
                    MetricValue(
                        key=province_to_abbrev[province],
                        value=int(soc_worker_province_breakdown_df.loc[province]),
                    )
                    for province in soc_worker_province_breakdown_df.index
                ],
            ),
        ],
    )
]

jobs_retained_section = Section(
    name=section_titles[SectionEnum.jobs_retain.name],
    section_type=SectionEnum.jobs_retain.name,
    metrics=jobs_retained,
)

department.sections.append(jobs_retained_section)

# Livelihoods section

# TODO: update when trend data becomes available

department.sections.append(
    Section(
        name=section_titles[SectionEnum.job_opportunities.name],
        section_type=SectionEnum.livelihoods.name,
        metrics=[
            Metric(
                name="ECD income and compliance support",
                metric_type=MetricTypeEnum.count.name,
                value=int(
                    opportunity_achievements_df.iloc[
                        department.achievement_lines[0], total_achievement_column
                    ]
                ),
                value_target=int(
                    opportunity_targets_df.iloc[department.target_lines[0], 2]
                ),
                dimensions=[],
            )
        ],
    )
)
# save all the data

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DOH

In [12]:
department_name = "Health"
sheet_name = "DoH"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name.upper()],
    paragraph=paragraphs[sheet_name.upper()],
    sections=[],
    target_lines=[39, 43],
    achievement_lines=[43, 47],
    implementation_details=[],
)

doh_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

health_job_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()
health_job_achievements = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()

health_budget_target = 180_000_000

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=health_budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=health_job_achievements,
            value_target=health_job_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)


# Job opportunties

dash_to_cons = {
    "Community Health Workers": "Community health workers",
    "Outreach Team Leaders": "Outreach team leaders",
    "Enrolled nurses": "Enrolled nurses",
    "Auxiliary nurses": "Auxiliary nurses",
}

job_op_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1],
            achievement_columns,
        ],
    ],
    axis=1,
).set_index(1)

job_op_targets_df = pd.concat(
    [
        opportunity_targets_df.iloc[
            department.target_lines[0] : department.target_lines[1], 1
        ],
        opportunity_targets_df.iloc[
            department.target_lines[0] : department.target_lines[1], 2
        ],
    ],
    axis=1,
).set_index(1)

prov_job_op_df = doh_df.iloc[14:24, 1:5].transpose()
prov_job_op_df.columns = doh_df.iloc[14:24, 0]
prov_job_op_df = prov_job_op_df.set_index(prov_job_op_df.columns[0])

job_op_metrics = []
for programme_name in job_op_achievements_df.index:
    dimensions = []
    if job_op_achievements_df.loc[programme_name].sum() > 0:
        dimensions.append(
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                viz=VizTypeEnum.line.name,
                lookup=LookupTypeEnum.time.name,
                values=[
                    MetricValue(key=months[i], value=value)
                    for i, value in enumerate(
                        job_op_achievements_df.loc[programme_name]
                    )
                ],
            )
        )
    if prov_job_op_df.loc[dash_to_cons[programme_name]].sum() > 0:
        dimensions.append(
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                viz=VizTypeEnum.bar.name,
                lookup=LookupTypeEnum.province.name,
                values=[
                    MetricValue(
                        key=province_to_abbrev[
                            prov_job_op_df.loc[programme_name].index[i]
                        ],
                        value=value,
                    )
                    for i, value in enumerate(
                        prov_job_op_df.loc[dash_to_cons[programme_name]]
                    )
                ],
            )
        )

    job_op_metrics.append(
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.count.name,
            value=int(job_op_achievements_df.loc[programme_name].iloc[-1]),
            value_target=int(job_op_targets_df.loc[programme_name].iloc[0]),
            dimensions=dimensions,
        )
    )

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=job_op_metrics,
)

# enrolled_nurses_target = int(
#     opportunity_targets_df.iloc[department.target_lines[0] + 2, 2]
# )
# enrolled_nurses = opportunity_achievements_df.iloc[
#     department.achievement_lines[0] + 2, achievement_columns
# ].tolist()

# auxiliary_nurse_df = pd.concat(
#     [doh_df.iloc[15:24, 0], doh_df.iloc[15:24, 4]], axis=1
# ).set_index(0)

# auxiliary_nurses_target = int(
#     opportunity_targets_df.iloc[department.target_lines[0] + 3, 2]
# )
# auxiliary_nurses = opportunity_achievements_df.iloc[
#     department.achievement_lines[0] + 3, achievement_columns
# ].tolist()


# section = Section(
#     name=section_titles[SectionEnum.job_opportunities.name],
#     section_type=SectionEnum.job_opportunities.name,
#     metrics=[
#         Metric(
#             name="Enrolled nurses",  # same as Staff Nurses
#             metric_type=MetricTypeEnum.count.name,
#             value=enrolled_nurses[-1],
#             value_target=enrolled_nurses_target,
#             dimensions=[
#                 Dimension(
#                     name=metric_titles[SectionEnum.job_opportunities.name][
#                         MetricTypeEnum.count.name + "_province"
#                     ],
#                     viz=VizTypeEnum.bar.name,
#                     lookup=LookupTypeEnum.province.name,
#                     values=[
#                         MetricValue(
#                             key=province_to_abbrev[province],
#                             value=int(enrolled_nurse_df.loc[province]),
#                         )
#                         for province in enrolled_nurse_df.index
#                     ],
#                 ),
#                 Dimension(
#                     name=metric_titles[SectionEnum.job_opportunities.name][
#                         MetricTypeEnum.count.name + "_time"
#                     ],
#                     viz=VizTypeEnum.line.name,
#                     lookup=LookupTypeEnum.time.name,
#                     values=[
#                         MetricValue(key=months[i], value=value)
#                         for i, value in enumerate(enrolled_nurses)
#                     ],
#                 ),
#             ],
#         ),
#         Metric(
#             name="Auxiliary nurses",  # same as Assistant Nurses
#             metric_type=MetricTypeEnum.count.name,
#             value=auxiliary_nurses[-1],
#             value_target=auxiliary_nurses_target,
#             dimensions=[
#                 Dimension(
#                     name=metric_titles[SectionEnum.job_opportunities.name][
#                         MetricTypeEnum.count.name + "_province"
#                     ],
#                     viz=VizTypeEnum.bar.name,
#                     lookup=LookupTypeEnum.province.name,
#                     values=[
#                         MetricValue(
#                             key=province_to_abbrev[province],
#                             value=int(auxiliary_nurse_df.loc[province]),
#                         )
#                         for province in auxiliary_nurse_df.index
#                     ],
#                 ),
#                 Dimension(
#                     name=metric_titles[SectionEnum.job_opportunities.name][
#                         MetricTypeEnum.count.name + "_time"
#                     ],
#                     viz=VizTypeEnum.line.name,
#                     lookup=LookupTypeEnum.time.name,
#                     values=[
#                         MetricValue(key=months[i], value=value)
#                         for i, value in enumerate(auxiliary_nurses)
#                     ],
#                 ),
#             ],
#         ),
#     ],
# )

# chw_programmes = [
#     Metric(
#         name=opportunity_targets_df.iloc[department.target_lines[0], 1],
#         metric_type=MetricTypeEnum.count.name,
#         value=int(
#             opportunity_achievements_df.fillna(0).iloc[
#                 department.achievement_lines[0], total_achievement_column
#             ]
#         ),
#         value_target=int(
#             opportunity_targets_df.iloc[department.achievement_lines[0], 2]
#         ),
#         dimensions=[],
#     ) for
# ]

# section.metrics.extend(chw_programmes)

department.sections.append(section)

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DALRRD

In [13]:
department_name = "Agriculture, Land Reform and Rural Development"
sheet_name = "DALRRD"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[8, 14],
    achievement_lines=[12, 17],
    implementation_details=[],
)

dallrd_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

dallrd_budget_target = 1_000_000_000
dallrd_livelihoods_target = opportunity_targets_df.iloc[department.target_lines[0], 2]
dallrd_livelihoods_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dallrd_budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dallrd_livelihoods_achieved,
            value_target=dallrd_livelihoods_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

# Budget allocation

# NO INFO AVAILABLE

# Job opportunties - for DALLR this is graduate employment

graduate_employment_by_province_df = dallrd_df.iloc[31:40, :2].set_index(0)
graduate_employment_over_time = opportunity_achievements_df.iloc[
    department.achievement_lines[0] + 5, achievement_columns
]
graduate_employment_total = int(dallrd_df.iloc[40, 1])
male_employment = float(dallrd_df.iloc[42, 1])
female_employment = float(dallrd_df.iloc[43, 1])
youth_employment = float(dallrd_df.iloc[44, 1])
section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name="Graduate Employment",
            metric_type=MetricTypeEnum.count.name,
            value=graduate_employment_total,
            dimensions=[
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_time"
                    ],
                    viz=VizTypeEnum.line.name,
                    lookup=LookupTypeEnum.time.name,
                    values=[
                        MetricValue(key=months[i], value=value)
                        for (i, value) in enumerate(graduate_employment_over_time)
                    ],
                ),
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_province"
                    ],
                    viz=VizTypeEnum.bar.name,
                    lookup=LookupTypeEnum.province.name,
                    values=[
                        MetricValue(
                            key=province_to_abbrev[province],
                            value=int(graduate_employment_by_province_df.loc[province]),
                        )
                        for province in graduate_employment_by_province_df.index
                    ],
                ),
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_gender"
                    ],
                    viz=VizTypeEnum.two_value.name,
                    lookup=LookupTypeEnum.gender.name,
                    values=[
                        MetricValue(key=GenderEnum.Male.name, value=male_employment),
                        MetricValue(
                            key=GenderEnum.Female.name, value=female_employment
                        ),
                    ],
                ),
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_age"
                    ],
                    viz=VizTypeEnum.percentile.name,
                    lookup=LookupTypeEnum.age.name,
                    values=[MetricValue(key="18-35", value=youth_employment)],
                ),
            ],
        )
    ],
)

department.sections.append(section)

# # Jobs retained

# # NO DATA

# # Livelihoods

# there are 5 programmes in columns 1 through 5
livelihoods_by_province = dallrd_df.iloc[11:20, :6].set_index(0)
livelihoods_by_province.columns = [
    name.strip() for name in list(dallrd_df.iloc[10, 1:6])
]
livelihoods_totals = list(dallrd_df.iloc[20, 1:6])

livelihoods_programme_names = list(livelihoods_by_province.columns)

livelihoods_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1],
            achievement_columns,
        ],
    ],
    axis=1,
).set_index(1)
livelihoods_achievements_df.index = pd.Index(
    [name.strip() for name in list(livelihoods_achievements_df.index)]
)

livelihood_programme_metrics = [
    Metric(
        name=programme_name,
        metric_type=MetricTypeEnum.count.name,
        value=livelihoods_achievements_df.loc[programme_name].iloc[-1],
        dimensions=[
            Dimension(
                name=metric_titles[SectionEnum.livelihoods.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                viz=VizTypeEnum.bar.name,
                lookup=LookupTypeEnum.province.name,
                values=[
                    MetricValue(
                        key=province_to_abbrev[province],
                        value=livelihoods_by_province.loc[province, programme_name],
                    )
                    for j, province in enumerate(livelihoods_by_province.index)
                ],
            ),
            #             THIS IS CURRENTLY DISABLED DUE TO HOW THE DATA IS PRESENTED IN THE SPREADSHEET
            Dimension(
                name=metric_titles[SectionEnum.livelihoods.name][
                    MetricTypeEnum.count.name + "_gender"
                ],
                viz=VizTypeEnum.two_value.name,
                lookup=LookupTypeEnum.gender.name,
                values=[
                    MetricValue(
                        key=GenderEnum.Male.name,
                        value=float(dallrd_df.iloc[24].iloc[1 + i]),
                    ),
                    MetricValue(
                        key=GenderEnum.Female.name,
                        value=float(dallrd_df.iloc[25].iloc[1 + i]),
                    ),
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.livelihoods.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                viz=VizTypeEnum.line.name,
                lookup=LookupTypeEnum.time.name,
                values=[
                    MetricValue(
                        key=months[i],
                        value=int(
                            livelihoods_achievements_df.loc[programme_name].iloc[i]
                        ),
                    )
                    for i in range(len(months))
                ],
            ),
            #             THIS IS CURRENTLY DISABLED DUE TO HOW THE DATA IS PRESENTED IN THE SPREADSHEET
            Dimension(
                name=metric_titles[SectionEnum.livelihoods.name][
                    MetricTypeEnum.count.name + "_age"
                ],
                viz=VizTypeEnum.percentile.name,
                lookup=LookupTypeEnum.age.name,
                values=[
                    MetricValue(
                        key="18-35", value=float(dallrd_df.iloc[26].iloc[1 + i])
                    )
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.livelihoods.name][
                    MetricTypeEnum.count.name + "_vets"
                ],
                viz=VizTypeEnum.count.name,
                lookup=LookupTypeEnum.vets.name,
                values=[
                    MetricValue(key="vets", value=int(dallrd_df.iloc[27].iloc[1 + i]))
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.livelihoods.name][
                    MetricTypeEnum.count.name + "_disabled"
                ],
                viz=VizTypeEnum.count.name,
                lookup=LookupTypeEnum.disabled.name,
                values=[
                    MetricValue(key="vets", value=int(dallrd_df.iloc[28].iloc[1 + i]))
                ],
            ),
        ],
    )
    for i, programme_name in enumerate(livelihoods_programme_names)
]

section = Section(
    name=section_titles[SectionEnum.livelihoods.name],
    section_type=SectionEnum.livelihoods.name,
    metrics=livelihood_programme_metrics,
)

department.sections.append(section)

target_programme_name = "Subsistence relief fund"
programme_name = target_to_imp_programme_mapping[target_programme_name]
implementation_status = implementation_status_df[
    implementation_status_df.Programme == programme_name
]
department.implementation_details.append(
    ImplementationDetail(
        programme_name=programme_name,
        status=implementation_status_to_enum[implementation_status.Status.iloc[0]],
        detail=implementation_status.Detail.iloc[0],
    )
)

all_data.departmentss = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DSI

In [14]:
department_name = "Science and Innovation"
sheet_name = "DSI"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[43, 47],
    achievement_lines=[47, 51],
    implementation_details=[],
)


dsi_budget_target = 45_000_000
dsi_jobs_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()  # sum the 4 programmes
dsi_jobs_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()

dsi_df = pd.read_excel(latest_excel, sheet_name=department.sheet_name)

# dsi_programme_targets_df = opportunity_targets_df.iloc[
#     department.target_lines[0] : department.target_lines[1], 1:3
# ].set_index(1)
# programme_target_metrics = [
#     Metric(
#         name=programme_name,
#         metric_type=MetricTypeEnum.count.name,
#         value=dsi_programme_targets_df.loc[programme_name].iloc[0],
#         dimensions=[],
#     )
#     for programme_name in list(dsi_programme_targets_df.index)
# ]

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dsi_budget_target,
            dimensions=[],
        ),
        Metric(
            name="Overall jobs created",
            metric_type=MetricTypeEnum.count.name,
            value=dsi_jobs_achieved,
            value_target=dsi_jobs_target,
            dimensions=[],
        ),
    ],
)
# section.metrics.extend(programme_target_metrics)

department.sections.append(section)

# Job creation results

dsi_targets_df = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 1:3
].set_index(1)

dsi_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1],
            achievement_columns,
        ],
    ],
    axis=1,
).set_index(1)

# target_to_achievement_programme_name = dict(
#     zip(dsi_targets_df.index, dsi_achievements_df.index)
# )
# t_2_a = target_to_achievement_programme_name
programme_lookup = {
    "HSRC - Health Promotion Agents": "HSRC - Health Promotion Agents",
    "DUCT- Enviro-champs": "DUCT - Enviro-champs",
    "CSIR - Experiential Training Programme ": "CSIR - Experiential Training Programme",
    " WRC - Water Graduate Employment Programme ": "WRC - Water Graduate Employment Programme",
}
by_province_df = dsi_df.iloc[7:17, 1:5].transpose().set_index(7).fillna(0)
by_province_df.columns = list(dsi_df.iloc[8:17, 0])
# print(by_province_df)
programmes = list(by_province_df.index)
by_programme_dims = {}
provinces = list(by_province_df.columns)

gender_row = 34
age_row = 36

for i, programme_name in enumerate(programmes):
    dimensions = []
    if dsi_achievements_df.loc[programme_lookup[programme_name]].iloc[-1] > 0:
        dimensions.append(
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                viz=VizTypeEnum.line.name,
                lookup=LookupTypeEnum.time.name,
                values=[
                    MetricValue(key=months[i], value=value)
                    for i, value in enumerate(
                        dsi_achievements_df.loc[programme_lookup[programme_name]]
                    )
                ],
            )
        )

    if by_province_df.loc[programme_name].sum() > 0:
        dimensions.append(
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                viz=VizTypeEnum.bar.name,
                lookup=LookupTypeEnum.province.name,
                values=[
                    MetricValue(
                        key=province_to_abbrev[province],
                        value=int(by_province_df.loc[programme_name, province]),
                    )
                    for province in provinces
                ],
            )
        )
    by_programme_dims[programme_lookup[programme_name]] = dimensions

    if float(dsi_df.iloc[gender_row, i + 1]) > 0:
        dimensions.append(
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_gender"
                ],
                viz=VizTypeEnum.two_value.name,
                lookup=LookupTypeEnum.gender.name,
                values=[
                    MetricValue(
                        key=GenderEnum.Male.name,
                        value=float(dsi_df.iloc[gender_row, i + 1]),
                    ),
                    MetricValue(
                        key=GenderEnum.Female.name,
                        value=float(dsi_df.iloc[gender_row + 1, i + 1]),
                    ),
                ],
            )
        )
    if float(dsi_df.iloc[age_row, i + 1]) > 0:
        dimensions.append(
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_age"
                ],
                viz=VizTypeEnum.percentile.name,
                lookup=LookupTypeEnum.age.name,
                values=[
                    MetricValue(key="18-35", value=float(dsi_df.iloc[age_row, i + 1]))
                ],
            )
        )

department.sections.append(
    Section(
        name=section_titles[SectionEnum.job_opportunities.name],
        section_type=SectionEnum.job_opportunities.name,
        metrics=[
            Metric(
                name=programme_name.strip(),
                metric_type=MetricTypeEnum.count.name,
                value=dsi_achievements_df.loc[programme_name].iloc[-1],
                value_target=dsi_targets_df.loc[programme_name].iloc[-1],
                dimensions=by_programme_dims.get(programme_name, []),
            )
            for programme_name in dsi_targets_df.index
        ],
    )
)


all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DSAC

In [15]:
department_name = "Sports, Arts and Culture"
sheet_name = "DSAC"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[25, 37],
    achievement_lines=[29, 41],
    implementation_details=[],
)

dsac_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

dsac_budget_target = 665_000_000
dsac_support_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()
dsac_support_achievement = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()


# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dsac_budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dsac_support_achievement,
            value_target=dsac_support_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

# Job creation results

programme_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ],
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1],
            achievement_columns,
        ],
    ],
    axis=1,
).set_index(1)

programme_targets_df = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 1:3
].set_index(1)

cre_provincial_breakdown = dsac_df.iloc[10:19, :8].fillna(0).set_index(0)
cre_provincial_breakdown.columns = strip_ws(dsac_df.iloc[9, 1:8])

ret_provincial_breakdown = dsac_df.iloc[55:64, :4].fillna(0).set_index(0)
ret_provincial_breakdown.columns = strip_ws(dsac_df.iloc[54, 1:4])
ret_provincial_breakdown

liv_provincial_breakdown = dsac_df.iloc[87:96, :3].fillna(0).set_index(0)
liv_provincial_breakdown.columns = strip_ws(dsac_df.iloc[86, 1:3])
liv_provincial_breakdown

provincial_breakdown = pd.concat(
    [cre_provincial_breakdown, ret_provincial_breakdown, liv_provincial_breakdown],
    axis=1,
).fillna(0)

cre_gender_breakdown = dsac_df.fillna(0).iloc[36:38, :8].set_index(0)
cre_gender_breakdown.columns = strip_ws(dsac_df.iloc[35, 1:8])

ret_gender_breakdown = dsac_df.fillna(0).iloc[69:71, :4].set_index(0)
ret_gender_breakdown.columns = strip_ws(dsac_df.iloc[68, 1:4])

liv_gender_breakdown = dsac_df.iloc[101:103, :3].fillna(0).set_index(0)
liv_gender_breakdown.columns = strip_ws(dsac_df.iloc[100, 1:3])
liv_gender_breakdown

gender_breakdown = pd.concat(
    [cre_gender_breakdown, ret_gender_breakdown, liv_gender_breakdown], axis=1
).fillna(0)

cre_age_breakdown = dsac_df.iloc[38:39, :8].set_index(0)
cre_age_breakdown.columns = strip_ws(dsac_df.iloc[9, 1:8])
cre_age_breakdown

ret_age_breakdown = dsac_df.iloc[71:72, :4].set_index(0)
ret_age_breakdown.columns = strip_ws(dsac_df.iloc[68, 1:4])

liv_age_breakdown = dsac_df.iloc[103:104, :3].fillna(0).set_index(0)
liv_age_breakdown.columns = strip_ws(dsac_df.iloc[100, 1:3])

age_breakdown = pd.concat(
    [cre_age_breakdown, ret_age_breakdown, liv_age_breakdown], axis=1
).fillna(0)


# programme_achievement_metrics_jobs = [
#     Metric(
#         name=programme_name,
#         metric_type=MetricTypeEnum.count.name,
#         value=programme_achievements_df.loc[programme_name].iloc[-1],
#         time=TimeValues(
#             name=metric_titles[SectionEnum.job_opportunities.name][
#                 MetricTypeEnum.count.name + "_time"
#             ],
#             values=[
#                 TimeValue(month=months[i], name=month_names[i], value=value)
#                 for i, value in enumerate(
#                     list(programme_achievements_df.loc[programme_name])
#                 )
#             ],
#         )
#         if programme_achievements_df.loc[programme_name].sum() > 0
#         else None,
#         gender=None,
#         age=None,
#         province=None,
#     )
#     for programme_name in list(programme_achievements_df.index)
#     if opportunity_type_df.loc[programme_name].iloc[0] == "CRE"
# ]

adjust_percentage = lambda x: float(x) / 100 if float(x) > 1 else float(x)
gender_index_to_key = {
    "Percentage (%) male ": GenderEnum.Male.name,
    "Percentage (%) female": GenderEnum.Female.name,
}

for section_type, name, filter_by in [
    (
        SectionEnum.job_opportunities.name,
        section_titles[SectionEnum.job_opportunities.name],
        "CRE",
    ),
    (SectionEnum.jobs_retain.name, section_titles[SectionEnum.jobs_retain.name], "RET"),
    (SectionEnum.livelihoods.name, section_titles[SectionEnum.livelihoods.name], "LIV"),
]:
    programme_achievement_metrics = []
    for programme_name in [pn.strip() for pn in programme_achievements_df.index]:
        if opportunity_type_df.loc[programme_name].iloc[0] != filter_by:
            continue
        dimensions = []
        if programme_achievements_df.loc[programme_name].sum() > 0:
            dimensions.append(
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_time"
                    ],
                    viz=VizTypeEnum.line.name,
                    lookup=LookupTypeEnum.time.name,
                    values=[
                        MetricValue(key=months[i], value=value)
                        for i, value in enumerate(
                            list(programme_achievements_df.loc[programme_name])
                        )
                    ],
                )
            )
        if pd.to_numeric(gender_breakdown.loc[:, programme_name]).sum() > 0.0:
            dimensions.append(
                Dimension(
                    name=metric_titles[section_type][
                        MetricTypeEnum.count.name + "_gender"
                    ],
                    viz=VizTypeEnum.two_value.name,
                    lookup=LookupTypeEnum.gender.name,
                    values=[
                        MetricValue(
                            key=gender_index_to_key[gender],
                            value=adjust_percentage(
                                gender_breakdown.loc[gender, programme_name]
                            ),
                        )
                        for gender in gender_breakdown.index
                    ],
                )
            )
        if float(age_breakdown.loc[:, programme_name].iloc[0]) != 0:
            dimensions.append(
                Dimension(
                    name=metric_titles[section_type][
                        MetricTypeEnum.count.name + "_province"
                    ],
                    viz=VizTypeEnum.percentile.name,
                    lookup=LookupTypeEnum.age.name,
                    values=[
                        MetricValue(
                            key=age_breakdown.index[0].strip(),
                            value=adjust_percentage(
                                age_breakdown.loc[:, programme_name].iloc[0]
                            ),
                        )
                    ],
                )
            )
        if provincial_breakdown.loc[:, programme_name].sum() != 0:
            dimensions.append(
                Dimension(
                    name=metric_titles[section_type][
                        MetricTypeEnum.count.name + "_province"
                    ],
                    viz=VizTypeEnum.bar.name,
                    lookup=LookupTypeEnum.province.name,
                    values=[
                        MetricValue(
                            key=province_to_abbrev[province],
                            value=int(
                                provincial_breakdown.loc[province, programme_name]
                            ),
                        )
                        for province in provincial_breakdown.index
                    ],
                )
            )

        programme_achievement_metrics.append(
            Metric(
                name=programme_name.strip(),
                metric_type=MetricTypeEnum.count.name,
                value=programme_achievements_df.loc[programme_name].iloc[-1],
                dimensions=dimensions,
                value_target=programme_targets_df.loc[programme_name].iloc[0],
            )
        )
    section = Section(
        name=name, section_type=section_type, metrics=programme_achievement_metrics
    )
    department.sections.append(section)

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DoT

In [16]:
department_name = "Transport"
sheet_name = "DoT"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[24, 25],
    achievement_lines=[28, 29],
    implementation_details=[],
)

dot_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

dot_budget_target = 630_000_000
dot_support_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()
dot_support_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()
# Overall targets
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dot_budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dot_support_achieved,
            value_target=dot_support_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

# Spending targets

# TODO

# Job creation results

rural_road_achievement_df = opportunity_achievements_df.iloc[
    department.achievement_lines[0], achievement_columns
]
prov_rural_road_df = dot_df.iloc[15:24, :2].fillna(0).set_index(0)
section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name="Rural roads maintenance",
            metric_type=MetricTypeEnum.count.name,
            value=rural_road_achievement_df.iloc[-1],
            value_target=dot_support_target,
            dimensions=[
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_time"
                    ],
                    viz=VizTypeEnum.line.name,
                    lookup=LookupTypeEnum.time.name,
                    values=[
                        MetricValue(key=months[i], value=int(amount))
                        for i, amount in enumerate(rural_road_achievement_df)
                    ],
                ),
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_province"
                    ],
                    viz=VizTypeEnum.bar.name,
                    lookup=LookupTypeEnum.province.name,
                    values=[
                        MetricValue(
                            key=province_to_abbrev[province],
                            value=int(prov_rural_road_df.loc[province].iloc[0]),
                        )
                        for province in list(prov_rural_road_df.index)
                    ],
                ),
            ],
        )
    ],
)

department.sections.append(section)

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DPWI

In [17]:
department_name = "Public Works and Infrastructure"
sheet_name = "DPWI"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[47, 56],
    achievement_lines=[51, 59],
    implementation_details=[],
)

dpwi_df = pd.read_excel(latest_excel, sheet_name=sheet_name, header=None)

dpwi_budget_target = 159_000_000
dpwi_support_target = opportunity_targets_df.iloc[department.target_lines[0], 2]
dpwi_support_achieved = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()
# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dpwi_budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dpwi_support_achieved,
            value_target=dpwi_support_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

# Job creation results

dpwi_job_achievements_df = pd.concat(
    [
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1], 1
        ].apply(lambda s: s.strip()),
        opportunity_achievements_df.iloc[
            department.achievement_lines[0] : department.achievement_lines[1],
            achievement_columns,
        ],
    ],
    axis=1,
).set_index(1)

dash_to_cons = {
    "Water and Energy Efficiency": "Water and Energy Efficiency (Green Economy) Facilities Management (PMTE)",
    "Water and Sanitation Facilities Management": "Water and Sanitation Facilities Management (PMTE)",
    "Welisizwe Rural Bridges Programme": "Welisizwe Rural Bridges Programme (PMTE)",
    "Facilities Management": "Facilities Management (PMTE) Employment:",
    "Real Estate": "Real Estate  (PMTE)",
    "In-House Construction projects": "In-House Construction projects",
    "Public Private Collaborations": "Public Private Collaborations",
}

cons_to_dash = dict(zip(dash_to_cons.values(), dash_to_cons.keys()))

programme_names = [pn.strip() for pn in dpwi_df.iloc[8, 1:8]]

dpwi_jobs_by_province_df = dpwi_df.iloc[9:18, :8].fillna(0).set_index(0)
metrics = []
for programme_index, programme_name in enumerate(programme_names):
    dimensions = []
    if dpwi_job_achievements_df.loc[cons_to_dash[programme_name]].sum() > 0:
        dimensions.append(
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_time"
                ],
                viz=VizTypeEnum.line.name,
                lookup=LookupTypeEnum.time.name,
                values=[
                    MetricValue(key=months[i], value=int(value))
                    for i, value in enumerate(
                        dpwi_job_achievements_df.loc[cons_to_dash[programme_name]]
                    )
                ],
            )
        )
    dimensions.extend(
        [
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_gender"
                ],
                viz=VizTypeEnum.two_value.name,
                lookup=LookupTypeEnum.gender.name,
                values=[
                    MetricValue(
                        key=GenderEnum.Male.name,
                        value=dpwi_df.iloc[35, 1:8].fillna(0).iloc[programme_index],
                    ),
                    MetricValue(
                        key=GenderEnum.Female.name,
                        value=dpwi_df.iloc[36, 1:8].fillna(0).iloc[programme_index],
                    ),
                ],
            ),
            Dimension(
                name=metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_age"
                ],
                viz=VizTypeEnum.percentile.name,
                lookup=LookupTypeEnum.age.name,
                values=[
                    MetricValue(
                        key="18-35",
                        value=dpwi_df.iloc[37, 1:8].fillna(0).iloc[programme_index],
                    )
                ],
            ),
            Dimension(
                metric_titles[SectionEnum.job_opportunities.name][
                    MetricTypeEnum.count.name + "_province"
                ],
                viz=VizTypeEnum.bar.name,
                lookup=LookupTypeEnum.province.name,
                values=[
                    MetricValue(
                        key=province_to_abbrev[province],
                        value=int(
                            dpwi_jobs_by_province_df.loc[province].iloc[programme_index]
                        ),
                    )
                    for province in list(dpwi_jobs_by_province_df.index)
                ],
            ),
        ]
    )
    metrics.append(
        Metric(
            name=cons_to_dash[programme_name.strip()],
            metric_type=MetricTypeEnum.count.name,
            value=dpwi_job_achievements_df.loc[cons_to_dash[programme_name]].iloc[
                -1
            ],  # we don't know per programme targets
            dimensions=dimensions,
        )
    )

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=metrics,
)

department.sections.append(section)
# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## DEFF

In [18]:
department_name = "Environment, Forestry and Fisheries"
sheet_name = "DEFF"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[14, 24],
    achievement_lines=[18, 28],
    implementation_details=[],
)

deff_budget_target = 1_983_000 * 1000
deff_support_target = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 2
].sum()

deff_support_achievements = opportunity_achievements_df.iloc[
    department.achievement_lines[0] : department.achievement_lines[1],
    total_achievement_column,
].sum()

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=deff_budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=deff_support_achievements,
            value_target=deff_support_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)


# Job creation results

deff_programme_targets = opportunity_targets_df.iloc[
    department.target_lines[0] : department.target_lines[1], 1:3
].set_index(1)

deff_job_achievements_df = (
    pd.concat(
        [
            opportunity_achievements_df.iloc[
                department.achievement_lines[0] : department.achievement_lines[1], 1
            ],
            opportunity_achievements_df.iloc[
                department.achievement_lines[0] : department.achievement_lines[1],
                achievement_columns,
            ],
        ],
        axis=1,
    )
    .set_index(1)
    .fillna(0)
)

programme_names = list(deff_job_achievements_df.index)

section = Section(
    name=section_titles[SectionEnum.job_opportunities.name],
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name.strip(),
            metric_type=MetricTypeEnum.count.name,
            value=int(deff_job_achievements_df.loc[programme_name].iloc[-1]),
            value_target=int(deff_programme_targets.loc[programme_name]),
            dimensions=[
                Dimension(
                    name=metric_titles[SectionEnum.job_opportunities.name][
                        MetricTypeEnum.count.name + "_time"
                    ],
                    viz=VizTypeEnum.line.name,
                    lookup=LookupTypeEnum.time.name,
                    values=[
                        MetricValue(key=months[i], value=int(value))
                        for i, value in enumerate(
                            deff_job_achievements_df.loc[programme_name]
                        )
                    ],
                )
            ],
        )
        for programme_index, programme_name in enumerate(programme_names)
    ],
)

department.sections.append(section)
# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## COGTA

In [19]:
department_name = "Co-operative government"
sheet_name = "DCOGTA"

department = Department(
    month=202101,
    name=department_name,
    sheet_name=sheet_name,
    lead=leads[sheet_name],
    paragraph=paragraphs[sheet_name],
    sections=[],
    target_lines=[37, 38],
    achievement_lines=[41, 42],
    implementation_details=[],
)

dcogta_budget_target = 50_000_000
dcogta_support_target = (
    opportunity_targets_df.fillna(0)
    .iloc[department.target_lines[0] : department.target_lines[1], 2]
    .sum()
)
dcogta_support_achievements = (
    opportunity_achievements_df.fillna(0)
    .iloc[
        department.achievement_lines[0] : department.achievement_lines[1],
        total_achievement_column,
    ]
    .sum()
)

# Overall targets
section = Section(
    name=section_titles[SectionEnum.targets.name],
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.currency.name],
            metric_type=MetricTypeEnum.currency.name,
            value=-1,
            value_target=dcogta_budget_target,
            dimensions=[],
        ),
        Metric(
            name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
            metric_type=MetricTypeEnum.count.name,
            value=dcogta_support_achievements,
            value_target=dcogta_support_target,
            dimensions=[],
        ),
    ],
)

department.sections.append(section)

# Job creation results

programme_name = "Mainstream labour intensive construction methods"

department.sections.append(
    Section(
        name=section_titles[SectionEnum.job_opportunities.name],
        section_type=SectionEnum.job_opportunities.name,
        metrics=[
            Metric(
                name=programme_name,
                metric_type=MetricTypeEnum.count.name,
                value=dcogta_support_achievements,
                value_target=dcogta_support_target,
                dimensions=[],
            )
        ],
    )
)

# Jobs retained

# NO DATA

# Livelihoods

# NO DATA

all_data.departments = add_or_replace(all_data.departments, department)
print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)

# print(department.to_json(indent=2))

## Overview picture

In [27]:
programmes_by_type = {
    SectionEnum.job_opportunities.name: {},
    SectionEnum.livelihoods.name: {},
    SectionEnum.jobs_retain.name: {},
}

achievements_by_type_by_month = {}
for section_type in [
    e.name for e in SectionEnum if e.name != "targets" and e.name != "budget_allocated"
]:
    achievements_by_type_by_month[section_type] = {}
    for month in months:
        achievements_by_type_by_month[section_type][month] = 0

achievements_df = opportunity_achievements_df.iloc[3:, 1:].set_index(1)
for department in all_data.departments:
    section_value = 0
    section_target_value = 0
    for section in department.sections:
        if section.section_type == SectionEnum.targets.name:
            continue
        total_value = 0
        total_target_value = 0
        for metric in section.metrics:
            #             if (
            #                 department.sheet_name == "DALRRD"
            #                 and metric.name == "Graduate Employment"
            #             ):
            #                 continue
            if metric.name not in achievements_df.index:
                print(
                    "Metric not found in achievements_df", department.name, metric.name
                )
            total_value += metric.value
            if metric.value_target > 0:
                total_target_value += metric.value_target
            for dimension in metric.dimensions:
                if dimension.lookup == LookupTypeEnum.time.name:
                    for metric_value in dimension.values:
                        month = metric_value.key
                        value = metric_value.value
                        achievements_by_type_by_month[section.section_type][
                            month
                        ] += value

        if (
            department.name == "Agriculture, Land Reform and Rural Development"
            and section.section_type == SectionEnum.livelihoods.name
        ):
            total_target_value = int(opportunity_targets_df.iloc[8, 2])
        elif (
            department.name == "Public Works and Infrastructure"
            and section.section_type == SectionEnum.job_opportunities.name
        ):
            total_target_value = int(opportunity_targets_df.iloc[47, 2])
        #         print(department.name, section.name, total_value, total_target_value)
        programmes_by_type[section.section_type][department.sheet_name] = {
            "value": total_value,
            "value_target": total_target_value,
        }
        if "Total" not in programmes_by_type[section.section_type]:
            programmes_by_type[section.section_type]["Total"] = dict(
                value=0, value_target=0
            )
        programmes_by_type[section.section_type]["Total"]["value"] += total_value
        programmes_by_type[section.section_type]["Total"][
            "value_target"
        ] += total_target_value
        section_value += total_value
        section_target_value += total_target_value

assert (
    programmes_by_type[SectionEnum.job_opportunities.name]["Total"]["value_target"]
    == opportunity_targets_df.iloc[6, 7]
), f'{SectionEnum.job_opportunities.name} total mismatch: {programmes_by_type[SectionEnum.job_opportunities.name]["Total"]["value_target"]} vs {opportunity_targets_df.iloc[6, 7]}'

assert (
    programmes_by_type[SectionEnum.livelihoods.name]["Total"]["value_target"]
    == opportunity_targets_df.iloc[7, 7]
), f'{SectionEnum.livelihoods.name} total mismatch: {programmes_by_type[SectionEnum.livelihoods.name]["Total"]["value_target"]} vs {opportunity_targets_df.iloc[7, 7]}'

assert (
    programmes_by_type[SectionEnum.jobs_retain.name]["Total"]["value_target"]
    == opportunity_targets_df.iloc[8, 7]
), f'{SectionEnum.jobs_retain.name} total mismatch: {programmes_by_type[SectionEnum.jobs_retain.name]["Total"]["value_target"]} vs {opportunity_targets_df.iloc[8, 7]}'

overview = Overview(
    month=202102,
    name="Programme overview",
    lead=leads["overview"],
    paragraph=paragraphs["overview"],
    sections=[
        Section(
            name="Summary",
            section_type=SectionEnum.overview.name,
            metrics=[
                Metric(
                    name=section_titles[section_name],
                    metric_type=section_name,
                    value=programmes_by_type[section_name]["Total"]["value"],
                    value_target=programmes_by_type[section_name]["Total"][
                        "value_target"
                    ],
                    dimensions=[
                        Dimension(
                            name="by department",
                            viz=VizTypeEnum.bar.name,
                            lookup=LookupTypeEnum.department.name,
                            values=[
                                MetricValue(
                                    key=department_name,
                                    value=outputs["value"],
                                    value_target=outputs["value_target"],
                                )
                                for department_name, outputs in department_info.items()
                                if not (
                                    department_name.startswith("value")
                                    or department_name == "Total"
                                )
                            ],
                        ),
                        Dimension(
                            name="over time",
                            viz=VizTypeEnum.line.name,
                            lookup=LookupTypeEnum.time.name,
                            values=[
                                MetricValue(key=key, value=value)
                                for key, value in achievements_by_type_by_month[
                                    section_name
                                ].items()
                            ],
                        ),
                    ],
                )
                for section_name, department_info in programmes_by_type.items()
                if not section_name.startswith("value")
            ],
            value=sum([metric.value if metric.value > 0 else 0 for metric in metrics]),
            value_target=sum(
                [
                    metric.value_target if metric.value_target > 0 else 0
                    for metric in metrics
                ]
            ),
        )
    ],
)

assert (
    opportunity_achievements_df.iloc[6:59, total_achievement_column].sum()
    == opportunity_achievements_df.iloc[59, total_achievement_column]
), "Sum of achievements does not add up to reported total"
assert (
    opportunity_targets_df.iloc[2:56, 2].sum() == opportunity_targets_df.iloc[56, 2]
), "Sum of targets does not add up to reported total"
overview.sections.insert(
    0,
    Section(
        name=section_titles[SectionEnum.targets.name + "_overview"],
        section_type=SectionEnum.targets.name,
        metrics=[
            Metric(
                name=metric_titles[SectionEnum.targets.name][
                    MetricTypeEnum.currency.name
                ],
                metric_type=MetricTypeEnum.currency.name,
                dimensions=[],
                value=opportunity_targets_df.iloc[2, 7] * 1000,
                value_target=opportunity_targets_df.iloc[2, 6] * 1000,
            ),
            Metric(
                name=metric_titles[SectionEnum.targets.name][MetricTypeEnum.count.name],
                metric_type=MetricTypeEnum.count.name,
                dimensions=[],
                value=opportunity_achievements_df.iloc[59, total_achievement_column],
                value_target=opportunity_targets_df.iloc[56, 2],
            ),
            Metric(
                name="Opportunities in process",
                metric_type=MetricTypeEnum.count.name,
                dimensions=[],
                value_target=opportunity_achievements_df.iloc[2, 1],
                value=0,
            ),
        ],
        value=None,
        value_target=None,
    ),
)
all_data.overview = overview
# print(overview.to_json(indent=2))

Metric not found in achievements_df Agriculture, Land Reform and Rural Development Graduate Employment


## Add implementation info

In [21]:
# ad_set = set()
# for department in all_data.departments:
#     for section in department.sections:
#         if section.section_type == SectionEnum.targets.name:
#             continue
#         for metric in section.metrics:
#             ad_set.add(metric.name)
# #             if metric.name not in implementation_status_df.Programme.values:
# #                 print(metric.name)
# # pn = implementation_status_df.Programme.iloc[0]
# # pn in implementation_status_df.Programme.values

# ot_set = set(list(opportunity_targets_df.iloc[2:55, 1]))
# imp_set = set(list(implementation_status_df.iloc[:53, 1]))
# ot_set.difference(imp_set)
# imp_set.difference(ot_set)

# target_to_imp_programme_mapping = {
#     "Banking with art, connecting Lives - National Museum Bloemfontein": " Banking with art, connecting Lives - National Museum Bloemfontein",
#     "CSIR - Experiential Training Programme": "CSIR - Experiential Training Programme ",
#     "Community Health Workers": "Community health workers",
#     "Covid-19 Return-To-Play - National Sport Federations": "Covid-19 Return-To-Play - National Sport Federations                                                                                                                                    ",
#     "Digitisation of records - National Library of South Africa": "Digitisation of records - National Library of South Africa ",
#     "Facilities Management": "Facilities Management (PMTE) Employment: ",
#     "In-House Construction projects": "In-House Construction projects ",
#     "Job retention at fee paying schools": "Retain vulnerable teaching posts",
#     "Municipal infrastructure": "Mainstream labour intensive construction methods",
#     "Outreach Team Leaders": "Outreach team leaders",
#     "Oceans and Coast: Source to Sea": "Oceans and Coast: Source to Sea ",
#     "Provincial Roads Maintenance": "Rural roads maintenance",
#     "Real Estate": "Real Estate  (PMTE)",
#     "Services sector development incentives": "Global Business Services Sector",
#     "Subsistence relief fund": "Subsistence producer relief fund",
#     "Retention of social workers": "Social workers",
#     "Vegetables and Fruits": "Vegetables and Fruits ",
#     "WRC - Water Graduate Employment Programme": " WRC - Water Graduate Employment Programme ",
#     "Water and Energy Efficiency": "Water and Energy Efficiency (Green Economy)",
#     "Water and Sanitation Facilities Management": "Water and Sanitation Facilities Management (PMTE)",
#     "Welisizwe Rural Bridges Programme": "Welisizwe Rural Bridges Programme (PMTE) ",
# }

# target_to_achieve_programme_mapping = {
#     "Community Health Workers": "Community health workers",
#     "Graduate programmes (Property Management Trading Entity)": None,
#     "Job retention at fee paying schools": "Retain vulnerable teaching posts",
#     "Municipal infrastructure": "Mainstream labour intensive construction methods",
#     "Outreach Team Leaders": "Outreach team leaders",
#     "Provincial Roads Maintenance": "Rural roads maintenance",
#     "Real Estate ": "Real Estate",
#     "Retention of social workers": "Social workers",
#     "Services sector development incentives": "Global Business Services Sector",
#     "Subsistence relief fund": None,
#     "WRC - Water Graduate Employment Programme ": " WRC - Water Graduate Employment Programme ",
#     "Water and Energy Efficiency ": "Water and Energy Efficiency",
#     "Water and Sanitation Facilities Management ": "Water and Sanitation Facilities Management",
#     "Welisizwe Rural Bridges Programme": "Welisizwe Rural Bridges Programme ",
# }

# len(ot_set)
# len(imp_set)

# oa_set = set(list(opportunity_achievements_df.iloc[2:54, 1]))
# oa_set.difference(ot_set)

# len(opportunity_achievements_df.iloc[3:54, 1])
# ot_set.difference(oa_set)

# ot_set.difference(ad_set)

# ot_set = set([s.strip() for s in opportunity_targets_df.iloc[2:55, 1]])

# ot_set.difference(ad_set)

# # ad_set.difference(ot_set)
# ad_set.difference(imp_set)
# for diff in sorted(ot_set.difference(imp_set)):
#     if diff not in target_to_imp_programme_mapping:
#         print(diff)

count = 0
dept_index = 0
dallrd_index = 0
for department in all_data.departments:
    if department.implementation_details is None:
        department.implementation_details = []
    section_index = 0
    for section in department.sections:
        if section.section_type == SectionEnum.targets.name:
            section_index += 1
            continue
        metric_index = 0
        for metric in section.metrics:
            if (
                metric.name == "Graduate Employment"
            ):  # DALLRD Graduate Employment programme has no implementation info
                metric_index += 1
                continue
            key = (
                metric.name
                if metric.name in implementation_status_df.Programme.values
                else target_to_imp_programme_mapping[metric.name]
            )
            imp_info = implementation_status_df[
                implementation_status_df.Programme == key
            ]
            status_str = imp_info.Status.iloc[0]
            if status_str not in implementation_status_to_enum:
                continue
            status = implementation_status_to_enum[status_str]
            detail = imp_info.Detail.iloc[0]
            impl = ImplementationDetail(
                programme_name=metric.name, status=status, detail=detail
            )
            if department.sheet_name == "DALLRD" or department.sheet_name == "DPWI":
                # these two departments have per-department programmes and implementation status
                continue
            else:
                metric.implementation_detail = impl
                all_data.departments[dept_index].sections[section_index].metrics[
                    metric_index
                ] = metric
            metric_index += 1
            count += 1
        section_index += 1

    dept_index += 1
print(count)

39


In [22]:
ad_set = set()
for department in all_data.departments:
    for section in department.sections:
        if section.section_type == SectionEnum.targets.name:
            continue
        for metric in section.metrics:
            ad_set.add(metric.name)

ot_set = set(list(opportunity_targets_df.iloc[2:55, 1]))
imp_set = set(list(implementation_status_df.iloc[:53, 1]))
ot_set.difference(ad_set)

{'Graduate programmes (Property Management Trading Entity)',
 'Subsistence producer relief fund'}

# Save final data

In [31]:
output_filename = output_dir + "/all_data.json"
all_data.departments.sort(key=operator.attrgetter("sheet_name"))
open(output_filename, "w").write(all_data.to_json(indent=2))
# print(all_data.to_json(indent=2))
print("DONE")

{
  "overview": {
    "month": 202103,
    "name": "Programme overview",
    "lead": "Building a society that works",
    "paragraph": "The COVID-19 pandemic has had a devastating economic impact, threatening the jobs and livelihoods of many South Africans \u2013 especially the most vulnerable. The pandemic has exacerbated South Africa\u2019s pre-existing crises of poverty and unemployment. The Presidential Employment Stimulus recognises the urgency of this situation, using direct public investment to support public employment, job retention and livelihood support programmes, as part of the wider economic recovery process.  This provides direct support to participants and their families, but also acts as a stimulus in local economies, with spending mainly in township and rural economies. This provides a vital stimulus to small enterprise and the informal sector - trickling up into the wider economy from there.\nThis platform tracks the performance of the programmes that form part of th

In [24]:
programme_status_df = pd.read_excel(
    mar_opportunities_excel, sheet_name="Implementation status", header=None
)

to_camel_case = lambda match: match.group(1) + match.group(2).upper() + match.group(3)

[
    re.sub(r"(\S*) (\w)(.*)", to_camel_case, status)
    for status in implementation_status_df.iloc[3:, 2].dropna().unique()
]

['MinorChallenges', 'CriticalChallenges', 'OnTrack']

In [25]:
for dept in all_data.departments:
    print(f"\t'{dept.name}': '{dept.sheet_name}',")

	'Trade, Industry and Competition': 'DTIC',
	'Basic Education': 'DBE',
	'Social Development': 'DSD',
	'Health': 'DoH',
	'Agriculture, Land Reform and Rural Development': 'DALRRD',
	'Science and Innovation': 'DSI',
	'Sports, Arts and Culture': 'DSAC',
	'Transport': 'DoT',
	'Public Works and Infrastructure': 'DPWI',
	'Environment, Forestry and Fisheries': 'DEFF',
	'Co-operative government': 'DCOGTA',


In [26]:
json.dump(metric_titles, open(output_dir + "/metric_titles.json", "w"), indent=2)