In [None]:
import sys
sys.path.append('../')

from src.d02_intermediate.simulation_preprocessing import SimulationPreprocessing
frl_key = 'tk5'
sp = SimulationPreprocessing(frl_key=frl_key, period="1819")

# Generate Individual Focal Students

The individual focal students for each block group are going to be generated at random. We are going to use the block groupes defined by SFUSD. The available data allows us to get rough estimates of the following probabilities for each block group:

- Probability a student is AALPI: $P(AALPI)$
- Probability a student is FRL: $P(FRL)$
- Probability a student is both AALPI and FRL: $P(AALPI \cap FRL)$
- Probability a student is FRL conditional on him being AALPI: $$P(FRL \mid AALPI) = \frac{P(AALPI \cap FRL)}{P(AALPI)}$$
- Probability a student is FRL conditional on him not being AALPI: $$P(FRL \mid \overline{AALPI}) = \frac{P(FRL)-P(AALPI \cap FRL)}{1-P(AALPI)}$$

Since some blocks have a small number of students we are going to use the mean of the posterior probability of a Bernoulli distribution with a uniform prior as probability estimates. Let $\theta$ be the parameter of the Bernoulli disttribution, $m$ the counts of interest and $n$ the total counts, we can get our probability estimates as: 

$$\hat{\theta} = \mathbb{E}\left[\theta\mid m,n\right]=\frac{m+1}{n+2}$$

For example, we can estimate the probability of a student being AALPI as

$$P(AALPI)=\frac{\textit{counts of AALPI}+1}{\textit{total counts}+2}$$

## Step 1: Compute new columns

In [None]:
from src.d04_modeling.ctip_classifier import CtipClassifier

sp.add_frl_labels()

tiebreaker = 'ctip1'
fpr = 0.04
model = CtipClassifier(positive_group='nBoth', frl_key=frl_key)

sp.add_equity_tiebreaker(model, params=fpr, tiebreaker=tiebreaker)
print(model.get_roc())

In [None]:
from src.d04_modeling.naive_classifier import NaiveClassifier

sp.add_frl_labels()

tiebreaker = 'special014'
fpr = 0.14
model = NaiveClassifier(positive_group='nAAFRL', frl_key=frl_key, proportion=True)

sp.add_equity_tiebreaker(model, params=fpr, tiebreaker=tiebreaker)

In [None]:
from src.d04_modeling.naive_classifier import NaiveClassifier

sp.add_frl_labels()

tiebreaker = 'naive004'
fpr = 0.04
model = NaiveClassifier(positive_group='nBoth', frl_key=frl_key, proportion=True)

sp.add_equity_tiebreaker(model, params=fpr, tiebreaker=tiebreaker)

In [None]:
from src.d04_modeling.naive_classifier import NaiveClassifier

sp.add_frl_labels()

tiebreaker = 'naive016'
fpr = 0.16
model = NaiveClassifier(positive_group='nBoth', frl_key=frl_key, proportion=True)

sp.add_equity_tiebreaker(model, params=fpr, tiebreaker=tiebreaker)

In [None]:
from src.d04_modeling.knapsack_classifier import KnapsackClassifier

sp.add_frl_labels()

tiebreaker = 'knapsack008'
fpr = 0.08
positive_group = 'nFocal'
model = KnapsackClassifier(positive_group=positive_group, load=True,
                           frl_key=frl_key, run_name="%s_%s.pkl" % (frl_key, positive_group))

sp.add_equity_tiebreaker(model, params=fpr, tiebreaker=tiebreaker)

In [None]:
from src.d04_modeling.knapsack_classifier import KnapsackClassifier


sp.add_frl_labels()

tiebreaker = 'knapsack014'
fpr = 0.14
positive_group = 'nBoth'
model = KnapsackClassifier(positive_group=positive_group, load=True,
                           frl_key=frl_key, run_name="%s_%s.pkl" % (frl_key, positive_group))

sp.add_equity_tiebreaker(model, params=fpr, tiebreaker=tiebreaker)

In [None]:
from src.d04_modeling.propositional_classifier import andClassifier

sp.add_frl_labels()

tiebreaker = 'pc1020_050'
params = [0.2, 0.5]
pc1 = andClassifier(["pctFocal", "BG_pctFocal"], group_criterion="nbhd", frl_key=frl_key)

sp.add_equity_tiebreaker(pc1, params=params, tiebreaker=tiebreaker)

In [None]:
from src.d04_modeling.propositional_classifier import andClassifier

sp.add_frl_labels()

tiebreaker = 'pc2025_040'
params = [0.25, 0.4]
positive_group = 'nBoth'
pc2 = andClassifier(["pctBoth", "BG_pctBoth"], positive_group=positive_group, group_criterion="nbhd", frl_key=frl_key)

sp.add_equity_tiebreaker(pc2, params=params, tiebreaker=tiebreaker)
print(pc2.get_roc([params]))

In [None]:
from src.d04_modeling.propositional_classifier import andClassifier, orClassifier

sp.add_frl_labels()

tiebreaker = 'pc3_035'
params = 0.30
positive_group = 'nBoth'

eligibility_classifier = orClassifier(["Housing", "Redline"], binary_var=[0,1])
pc3 = andClassifier(["pctBoth"], positive_group=positive_group, eligibility_classifier=eligibility_classifier, frl_key=frl_key)

sp.add_equity_tiebreaker(pc3, params=params, tiebreaker=tiebreaker)
print(pc3.get_roc([params]))
pc3.plot_map(params=params)

In [None]:
from src.d04_modeling.propositional_classifier import andClassifier, orClassifier

sp.add_frl_labels()

tiebreaker = 'pc3_060'
params = 0.60
positive_group = 'nBoth'

eligibility_classifier = orClassifier(["Housing", "Redline"], binary_var=[0,1])
pc3 = andClassifier(["pctBoth"], positive_group=positive_group, eligibility_classifier=eligibility_classifier, frl_key=frl_key)

sp.add_equity_tiebreaker(pc3, params=params, tiebreaker=tiebreaker)
print(pc3.get_roc([params]))

## Step 2: Update student data

In [None]:
sp.set_recalculate(True)

In [None]:
student_out = sp.update_student_data(tiebreaker)

## Step 3: Save studen data

In [None]:
sp.save_student_data(student_out)