In [18]:
from policyengine_uk import Microsimulation
from policyengine_uk.data.datasets import UKMOD_FRS_2018
from microdf import MicroDataFrame
import pandas as pd
from policyengine_uk.data.storage import STORAGE_FOLDER

ukmod_output = pd.read_csv(STORAGE_FOLDER / "uk_2018_std.txt", delimiter="\t")
ukmod_input = pd.read_csv(STORAGE_FOLDER / "uk_2018_a4.txt", delimiter="\t")
output_columns = [
    column
    for column in ukmod_output.columns
    if column not in ukmod_input.columns
]
ukmod = pd.merge(
    ukmod_output[output_columns + ["idperson"]],
    ukmod_input,
    on="idperson",
    how="right",
)

ukmod = MicroDataFrame(ukmod, weights="dwt")

UKMOD_FRS_2018().generate()

sim = Microsimulation(dataset="ukmod_frs_2018")

df = pd.DataFrame(
    {
        "household_id": sim.calculate("household_id", map_to="person").values,
        "tscse_s": ukmod.tscse_s.values * 12,
        "tscee_s": ukmod.tscee_s.values * 12,
        "ni_class_1_employee": sim.calculate("ni_class_1_employee").values,
        "ni_self_employed": sim.calculate("ni_self_employed").values,
        "ni_class_4_maximum": sim.calculate("ni_class_4_maximum").values,
        "self_employment_income": sim.calculate(
            "self_employment_income"
        ).values,
        "yse": ukmod.yse * 12,
        "employment_income": sim.calculate("employment_income").values,
    }
)
df["Error"] = df["ni_self_employed"] - df["tscse_s"]
df["Absolute Error"] = df["Error"].abs()
df[df["Absolute Error"] > 0].sort_values("Absolute Error", ascending=False)

Unnamed: 0,household_id,tscse_s,tscee_s,ni_class_1_employee,ni_self_employed,ni_class_4_maximum,self_employment_income,yse,employment_income,Error,Absolute Error
2758,123600,2652.00,1176.60,1189.760132,65223.566406,3.091324e+06,3134196.0,3.134196e+06,18252.0,62571.566406,62571.566406
27504,1225400,3370.32,253.08,266.240051,10768.915039,3.685918e+05,410540.0,4.105400e+05,10556.0,7398.595039,7398.595039
29554,1317600,1234.92,2998.68,3011.839600,4540.563477,5.717425e+04,101868.0,1.018680e+05,33436.0,3305.643477,3305.643477
43046,1916000,868.68,4631.76,4587.093262,3611.538330,1.079857e+04,56992.0,5.699200e+04,49712.0,2742.858330,2742.858330
18885,841000,764.64,5329.56,5284.933105,3493.581543,5.598569e+03,51792.0,5.179200e+04,84604.0,2728.941543,2728.941543
...,...,...,...,...,...,...,...,...,...,...,...
41266,1836400,153.84,0.00,0.000000,152.880005,3.318090e+03,8216.0,8.216000e+03,0.0,-0.959995,0.959995
32611,1451600,153.84,0.00,0.000000,152.880005,3.318090e+03,8216.0,8.216000e+03,0.0,-0.959995,0.959995
7473,334800,153.84,0.00,0.000000,152.880005,3.318090e+03,8216.0,8.216000e+03,0.0,-0.959995,0.959995
3445,153900,153.84,0.00,0.000000,152.880005,3.318090e+03,8216.0,8.216000e+03,0.0,-0.959995,0.959995


In [13]:
df

Unnamed: 0,household_id,tscse_s,tscee_s,ni_class_1_employee,ni_self_employed,ni_class_4_maximum,self_employment_income,yse,employment_income,Error,Absolute Error
0,100,0.00,0.0,0.0,0.000000,941.170166,0.0,0.0,29380.0,0.000000,0.000000
1,200,153.84,0.0,0.0,148.199997,3318.090088,7176.0,7176.0,0.0,-5.640003,5.640003
2,200,0.00,0.0,0.0,0.000000,3466.290039,0.0,0.0,0.0,0.000000,0.000000
3,200,0.00,0.0,0.0,0.000000,3466.290039,0.0,0.0,0.0,0.000000,0.000000
4,300,0.00,0.0,0.0,0.000000,3466.290039,0.0,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
43082,1917300,0.00,0.0,0.0,0.000000,3466.290039,0.0,0.0,0.0,0.000000,0.000000
43083,1917300,0.00,0.0,0.0,0.000000,3466.290039,0.0,0.0,0.0,0.000000,0.000000
43084,1917400,0.00,0.0,0.0,0.000000,3466.290039,0.0,0.0,0.0,0.000000,0.000000
43085,1917500,0.00,0.0,0.0,0.000000,3466.290039,0.0,0.0,0.0,0.000000,0.000000


In [23]:
(error < 10).mean()

0.6419801796365493

In [19]:
(df["Absolute Error"] < 10).mean()

0.9623320259010838

In [3]:
import plotly.express as px

px.scatter(
    df,
    x="employment_income",
    y="Error",
    color="employment_income",
    opacity=0.1,
    hover_data=df.columns,
)

In [4]:
sim.calculate("ni_class_4").sum() / 1e9

4.75112974983255

In [5]:
ukmod.tscee_s.sum() / 1e9 * 12

56.02758694559999

In [6]:
from policyengine_uk import Simulation

sim = Simulation(
    situation={
        "people": {"person": {}},
        "axes": [
            [
                {
                    "name": "employment_income",
                    "min": 0,
                    "max": 100_000,
                    "count": 100,
                }
            ]
        ],
    }
)

px.line(
    x=sim.calculate("employment_income"),
    y=sim.calculate("ni_class_1_employee_primary"),
)