# Imputing land values to the FRS

In [13]:
import plotly.express as px
from ubicenter import format_fig
import numpy as np
import pandas as pd
import microdf as mdf
import synthimpute as si
import sklearn

In [14]:
was = pd.read_csv("~/was.csv")

In [15]:
mdf.gini(was, "wealth", "weight")

0.6154105080884578

In [16]:
mdf.gini(was, "est_land", "weight")

0.6158429149616598

In [17]:
train, test = sklearn.model_selection.train_test_split(was)

In [18]:
TRAIN_COLS = [
    "gross_income",  # Household gross income.
    "DVTotGIRW5",  # Household net income.
]

IMPUTE_COLS = [
    "est_land",  # Total wealth.
]

In [19]:
test["pred_land"] = si.rf_impute(
    x_train=train[TRAIN_COLS],
    y_train=train[IMPUTE_COLS],
    x_new=test[TRAIN_COLS],
    sample_weight_train=train.weight,
)

In [20]:
test[TRAIN_COLS + ["pred_land"]]

Unnamed: 0,gross_income,pred_land
16311,42900.0,112654.812657
16201,80400.0,895519.259239
5007,66300.0,158894.395150
3860,63000.0,352006.028309
8055,31100.0,139746.528842
...,...,...
13535,63100.0,188376.306759
2002,157000.0,759201.000637
11573,22100.0,211350.180626
16451,41600.0,202036.905561


In [21]:
from openfisca_uk import Microsimulation

sim = Microsimulation(year=2020)

In [22]:
frs = sim.df(["gross_income", "pension_income", "net_income", "num_adults", "num_children", "people", "savings_interest_income", "dividend_income", "employment_income", "self_employment_income"], map_to="household")
frs["investment_income"] = frs.savings_interest_income + frs.dividend_income

In [23]:
frs["pred_land"] = si.rf_impute(
    x_train=was[TRAIN_COLS],
    y_train=was[IMPUTE_COLS],
    x_new=frs[TRAIN_COLS],
    sample_weight_train=was.weight,
)

In [32]:
total_land = mdf.weighted_sum(frs, "pred_land", "weight") / 1e9

total_was_land = mdf.weighted_sum(was, "est_land", "weight") / 1e9

frs.pred_land *= total_was_land / total_land

In [34]:
LVT_RATE = 0.01
pop = mdf.weighted_sum(frs, "people", "weight")
ubi = total_was_land * LVT_RATE / pop
frs["lvt"] = frs.pred_land * LVT_RATE
frs["ubi"] = ubi * frs.people
frs["new_net_income"] = frs.net_income - frs.lvt + frs.ubi

In [35]:
frs.poverty_rate()

TypeError: poverty_rate() missing 2 required positional arguments: 'income' and 'threshold'

In [None]:
train, test = sklearn.model_selection.train_test_split(df)