In [14]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [15]:
from enum import Enum
import pandas as pd
import altair as alt
import numpy as np
import json
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, List

from dataclasses_json import dataclass_json

In [16]:
output_dir = "/home/pvh/Documents/code/pvh-forks/presidential-employment-stimulus/data"

### Data structure

Each department has a total budget and total opportunities target. 

The overall programme has outcome targets
1. Jobs created
2. Jobs retained
3. Livelihoods supported

Each department has a "blurb" describing their programme.

Within each department there are multiple programmes that can contribute to each of these targets.

Each programme has a demographic split of outcomes, with gender and youth percentages.

Each programme has a per-province split of outcomes.

Files:

`Consolidated data (Dec) - Presidential Employment Stimulus.xlsx` - December sheet

`Consolidated Presidential Employment Stimulus Reporting Template.xlsx` - January sheet

In [17]:
december_excel = "Consolidated data (Dec) - Presidential Employment Stimulus.xlsx"
january_excel = "Consolidated Presidential Employment Stimulus Reporting Template.xlsx"

december_sheets = pd.ExcelFile(december_excel).sheet_names
january_sheets = pd.ExcelFile(january_excel).sheet_names

provinces = [
    "Eastern Cape",
    "Free State",
    "Gauteng",
    "KwaZulu-Natal",
    "Limpopo",
    "Mpumalanga",
    "North West",
    "Northern Cape",
    "Western Cape",
]
province_abbreviations = ["EC", "FS", "GP", "KZN", "LP", "NW", "NC", "WC"]

```
interface DepartmentMonth {
  month: number // 202101
  name: string // Basic Education
  lead: string // Strengthening the learning environment in schools
  paragraph: string
  sections: Array<{
    name: string // Budget allocated to date
    matrics: Array<{
      name: string // Educational and general assistants
      type: 'currency' | 'count'
      value: number
      valueTarget?: number
      time?: {
        name: string // spend
        values: Array<{
          month: number // 202101
          value: number
        }>
      }
      gender?: {
        name: string // opportunities
        values: Array<{
          gender: 'female' | 'male'
          value: number
        }>
      }
      age?: {
        name: string // opportunities
        values: Array<{
          age: string // 18-35
          value: number
        }>
      }
      province?: {
        name: string // opportunities
        values: Array<{
          province: 'EC' | 'FS' | 'GP' | 'KZN' | 'LP' | 'MP' | 'NC' | 'NW' | 'WC'
        }>
      }
    }>
  }>
}
```

## 

In [18]:
SectionEnum = Enum(
    "Section", "targets budget_allocated job_opportunities jobs_retain livelihoods"
)

MetricTypeEnum = Enum("MetricType", "currency count")

ProvinceEnum = Enum("Province", "EC FS GP KZN LP MP NC NW WC")

province_to_abbrev = {
    "Free State": "FS",
    "Gauteng": "GP",
    "KwaZulu-Natal": "KZN",
    "Limpopo": "LP",
    "Mpumalanga": "MP",
    "North West": "NW",
    "Northern Cape": "NC",
    "Western Cape": "WC",
}


@dataclass_json
@dataclass
class TimeValue:
    month: int  # encoding month as in 202101
    name: str  # human readable time period name
    value: int


@dataclass_json
@dataclass
class AgeValue:
    age_category: str  # 18-35 or youth?
    value: int


@dataclass_json
@dataclass
class GenderValue:
    gender: str  # enum: 'female' or 'male'
    value: int


@dataclass_json
@dataclass
class ProvinceValue:
    province: str  # enum: 'EC' | 'FS' | 'GP' | 'KZN' | 'LP' | 'MP' | 'NC' | 'NW' | 'WC'
    value: int


@dataclass_json
@dataclass
class TimeValues:
    name: str
    values: List["TimeValue"]


@dataclass_json
@dataclass
class ProvinceValues:
    name: str
    values: List["ProvinceValue"]


@dataclass_json
@dataclass
class AgeValues:
    name: str
    values: List["AgeValue"]


@dataclass_json
@dataclass
class GenderValues:
    name: str
    values: List["GenderValue"]


@dataclass_json
@dataclass
class Metric:
    name: str
    metric_type: str  # enum of 'currency', 'count'
    value: int
    time: Optional[TimeValues]
    gender: Optional[GenderValues]
    age: Optional[AgeValues]
    province: Optional[ProvinceValues]
    value_target: int = -1


@dataclass_json
@dataclass
class Section:
    name: str
    section_type: str  # enum of 'targets', 'budget_allocated', 'job_opportunities', 'jobs_retain', 'livelihoods'
    metrics: List["Metric"]


@dataclass_json
@dataclass
class Department:
    month: int  # the month of latest data
    name: str
    sheet_name: str
    lead: str
    paragraph: str
    sections: List["Section"]

## DTIC

In [19]:
department_name = "Trade, Industry and Competition"
sheet_name = "DTIC"

department = Department(
    month=202012,
    name=department_name,
    sheet_name=sheet_name,
    lead="Piloting new models for re-shoring and expanding global business services",
    paragraph="The Global Business Services Sector has an impressive track record. Established in 2006/7 to provide offshore customer service delivery, the sector has built from a low base to achieve an average year-on-year export revenue growth of at least 20% since 2014.",
    sections=[],
)

budget_target = 120_000 * 1000
opportunities_target = 8_000

# Programme targets for this department
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Budget",
            metric_type=MetricTypeEnum.currency.name,
            value=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Job opportunities",
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)

december_df = pd.read_excel(december_excel, sheet_name=sheet_name, header=None)

oct_nov_spend = december_df.iloc[48, 1]
dec_spend = december_df.iloc[49, 1]

programme_name = "Global Business Services Sector expansion"

# budget allocated to date
section = Section(
    name="Budget allocated to date",
    section_type=SectionEnum.budget_allocated.name,
    metrics=[
        Metric(
            name=programme_name,
            metric_type=MetricTypeEnum.currency.name,
            value=dec_spend,
            value_target=budget_target,
            time=TimeValues(
                name="Spent over time",
                values=[
                    TimeValue(month=202011, name="Nov '20", value=oct_nov_spend),
                    TimeValue(month=202012, name="Dec '20", value=dec_spend),
                ],
            ),
            gender=None,
            age=None,
            province=None,
        )
    ],
)

department.sections.append(section)

# Job opportunities created to date

total_programme_jobs = december_df.iloc[28:37, 1].sum()
percentage_male = december_df.iloc[41, 1] * 100
percentage_female = december_df.iloc[42, 1] * 100
percentage_youth = december_df.iloc[43, 1] * 100


by_province_df = december_df.iloc[29:37, :5]
by_province_df = by_province_df.set_index(0).sum(axis=1)

by_province_df = december_df.iloc[29:37, :5]
by_province_df = by_province_df.set_index(0).sum(axis=1)

province_values = ProvinceValues(name="Opportunities by Province", values=[])
for province in list(by_province_df.index):
    pv = ProvinceValue(
        province=province_to_abbrev[province], value=int(by_province_df.loc[province])
    )
    province_values.values.append(pv)

section = Section(
    name="Job opportunities created to date",
    section_type=SectionEnum.job_opportunities.name,
    metrics=[
        Metric(
            name=programme_name,
            metric_type="count",
            value=total_programme_jobs,
            time=TimeValues(
                name="Employed over time",
                values=[
                    TimeValue(month=202012, name="Dec '20", value=total_programme_jobs)
                ],
            ),
            gender=GenderValues(
                name="Jobs by Gender",
                values=[
                    GenderValue(gender="Male", value=percentage_male),
                    GenderValue(gender="Female", value=percentage_female),
                ],
            ),
            age=AgeValues(
                name="Opportunities for 18-35 year olds",
                values=[
                    AgeValue(
                        age_category="18-35",
                        value=int(percentage_youth / 100 * total_programme_jobs),
                    )
                ],
            ),
            province=province_values,
        )
    ],
)

department.sections.append(section)

print(
    department.to_json(indent=2),
    file=open(output_dir + "/" + sheet_name + ".json", "w"),
)
print(department.to_json(indent=2))

{
  "month": 202012,
  "name": "Trade, Industry and Competition",
  "sheet_name": "DTIC",
  "lead": "Piloting new models for re-shoring and expanding global business services",
  "paragraph": "The Global Business Services Sector has an impressive track record. Established in 2006/7 to provide offshore customer service delivery, the sector has built from a low base to achieve an average year-on-year export revenue growth of at least 20% since 2014.",
  "sections": [
    {
      "name": "Programme targets for this department",
      "section_type": "targets",
      "metrics": [
        {
          "name": "Budget",
          "metric_type": "currency",
          "value": 120000000,
          "time": null,
          "gender": null,
          "age": null,
          "province": null,
          "value_target": -1
        },
        {
          "name": "Job opportunities",
          "metric_type": "count",
          "value": 8000,
          "time": null,
          "gender": null,
          "ag

In [20]:
by_province_df = december_df.iloc[29:37, :5]
by_province_df = by_province_df.set_index(0).sum(axis=1)

In [21]:
province_values = ProvinceValues(name="Opportunities by Province", values=[])
for province in list(by_province_df.index):
    pv = ProvinceValue(
        province=province_to_abbrev[province], value=int(by_province_df.loc[province])
    )
    province_values.values.append(pv)
province_values.to_dict()

{'name': 'Opportunities by Province',
 'values': [{'province': 'FS', 'value': 0},
  {'province': 'GP', 'value': 872},
  {'province': 'KZN', 'value': 7166},
  {'province': 'LP', 'value': 0},
  {'province': 'MP', 'value': 0},
  {'province': 'NW', 'value': 0},
  {'province': 'NC', 'value': 0},
  {'province': 'WC', 'value': 7106}]}

## DBE

In [None]:
## department_name = "Basic Education"
sheet_name = "DBE"

department = Department(
    month=202103,
    name=department_name,
    sheet_name=sheet_name,
    lead="Teachers assistants and other support for schools",
    paragraph="A key priority identified in the National Development Plan is the improvement of quality education, skills development, and innovation. One intervention that has seen some experimentation in South Africa, with significant potential to scale nationally, is the use of school assistants to strengthen the learning environment. An important rationale for school assistants is the need to support teachers in the classroom, freeing up time for teaching and providing additional support to learners to improve education outcomes.",
    sections=[],
)

budget_target = 7_000_000 * 1000
opportunities_target = 344_933


december_df = pd.read_excel(
    "Consolidated data (Dec) - Presidential Employment Stimulus.xlsx",
    sheet_name=sheet_name,
    header=None,
)
january_df = pd.read_excel(
    "Consolidated Presidential Employment Stimulus Reporting Template.xlsx",
    sheet_name="DBE",
    header=None,
)

assistants_budget = january_df.iloc[42, 1]
post_saving_budget = january_df.iloc[42, 2]

# TODO: abstract this
# Programme targets for this department
section = Section(
    name="Programme targets for this department",
    section_type=SectionEnum.targets.name,
    metrics=[
        Metric(
            name="Total budget",
            metric_type=MetricTypeEnum.currency.name,
            value=budget_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Budget for Education and General Assistant Posts",
            metric_type=MetricTypeEnum.currency.name,
            value=assistants_budget,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Budget for Saving Vulnerable Posts",
            metric_type=MetricTypeEnum.currency.name,
            value=post_saving_budget,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
        Metric(
            name="Job opportunities",
            metric_type=MetricTypeEnum.count.name,
            value=opportunities_target,
            time=None,
            gender=None,
            age=None,
            province=None,
        ),
    ],
)

department.sections.append(section)


programme_spends = [
    dict(
        name="Education and general assistants",
        amounts=[december_df.iloc[44, 1], january_df.iloc[44, 1]],
    ),
    dict(
        name="Vulnerable  posts saved",
        amounts=[december_df.iloc[44, 2], january_df.iloc[44, 2]],
    ),
]

metrics = []
months = [202101, 202103]
month_names = ["Jan '21", "Mar '21"]

for programme in programme_spends:
    values = []
    total_value = 0
    for i, value in enumerate(programme["amounts"]):
        values.append(TimeValue(month=months[i], name=month_names[i], value=value))
        total_value += value
    metric = Metric(
        name=programme["name"],
        metric_type=MetricTypeEnum.currency.name,
        value=total_value,
        value_target=budget_target,
        time=TimeValues(name="what_is_this_name_for", values=values),
        gender=None,
        age=None,
        province=None,
    )
    metrics.append(metric)

programme_name = "Global Business Services Sector expansion"

# budget allocated to date
section = Section(
    name="Budget allocated to date",
    section_type=SectionEnum.budget_allocated.name,
    metrics=metrics,
)

department.sections.append(section)

# Job opportunities created to date

print(department.to_json(indent=2))

In [62]:
post_saving_budget = january_df.iloc[42, 2]

In [63]:
assistants_budget + post_saving_budget

9430615000

In [64]:
budget_target

7000000000

In [65]:
december_df.iloc[44, 2]

151913

In [66]:
january_df.iloc[44, 2]

379998000

In [67]:
january_df.iloc[44]

0    Total amount spent this month
1                       2828913000
2                        379998000
3                              NaN
Name: 44, dtype: object

In [68]:
ed_assistants = december_df.iloc[19, 1]
gen_assistants = december_df.iloc[19, 2]
print(ed_assistants, gen_assistants)
ed_assistants = january_df.iloc[19, 1]
gen_assistants = january_df.iloc[19, 2]
print(ed_assistants, gen_assistants)

158437 120735
155341 128894
