After submitting admissions the students were awarded with some amount of money, depending on the `Univeraity Rating` and `Chance of Admit`.

$$ \textrm{Award} = \textrm{Chance of Admit} * \textrm{multiplier}_{\textrm{University Rating}}$$

However some information has been lost:
- `Chance of Admit` for each student
- Multipliers

Fortunatelly we have information about the award amounts (`Award`) and students data (`GRE Score`, `TOEFL Score`, `University Rating`, `SOP`, `LOR`, `CGPA`, `Research`, `Chance of Admit`) from admissions.

Build a Bayesian model (possibly linear) for predicting `Chance of Admit` and use in a model that predicts `Award` and train them simultanously using `x_train` and `y_train_award`.

Evaluate the obtained models on:
- `x_test` and `y_test_award` with the stacked model
- `x_test` and `y_test_chance` with the "internal" model

In [None]:
%load_ext autoreload
%autoreload 2

import pickle
import pyro
from pyro.nn import PyroModule, PyroSample
from pyro.infer import SVI, Trace_ELBO, TraceGraph_ELBO
import pyro.distributions as dist
from torch import nn
import torch
from torch.distributions import constraints
from pyro.optim import SGD, Adam
from matplotlib import pyplot as plt

In [None]:
import sys
sys.path.append('./')

In [None]:
from src.utils import run_svi, SviPredictive, summary
from src.visualization import plot_predictions

In [None]:
with open('./data/stacked-regression.pkl', 'rb') as f:
    data = pickle.load(f)
x_train = data['x_train']
y_train_award = data['y_train_award']
y_train_chance = data['y_train_chance']
x_test = data['x_test']
y_test_award = data['y_test_award']
y_test_chance = data['y_test_chance']

In [None]:
class BayesianLinearRegression(PyroModule):
    def __init__(self, n_input):
        super().__init__()
        self.n_input = n_input
        
    def model(self, x, y=None):
        pass

    def forward(self, *args, **kwargs):
        return self.model(*args, **kwargs)
    
    def guide(self, x, y=None):
        # Make sure that guide returns a value
        pass

    
class StackedModel(nn.Module):
    
    def __init__(self, n_input, initial_multipliers, university_rating_column, university_ratings):
        super().__init__()
        self.logit = nn.Sigmoid()
        self.linear_regression = BayesianLinearRegression(n_input)
        
        self.initial_multipliers = initial_multipliers
        self.university_rating_column = university_rating_column
        self.university_ratings = university_ratings
        
    def model(self, x, y=None):
        # Use `model` from `self.linear_regression`
        pass
    
    def forward(self, *args, **kwargs):
        return self.model(*args, **kwargs)
    
    def guide(self, x, y=None):
        # Use `guide` from `self.linear_regression`
        pass

In [None]:
m = StackedModel(
    n_input=7,
    initial_multipliers={
        1: 90,
        2: 54,
        3: 32,
        4: 101,
        5: 56,
    },
    university_rating_column=2,
    university_ratings=range(1,6),
)

elbos, params = run_svi(
    m,
    m.guide,
    x_train,
    y_train_award,
    verbose=True,
    optimizer=Adam({"lr": 0.0001}),
    num_iters=5_000,
    loss=TraceGraph_ELBO()
)
print(dict(pyro.get_param_store()))
plt.plot(elbos)
plt.title('ELBO')
plt.show()

for param in params[0].keys():
    y = [d[param].detach().reshape(-1).numpy() for d in params]
    plt.plot(y)
    plt.title(param)
    plt.show()

# Model evaluation

In [None]:
num_samples = 1_000

## Chance of Admit Model Evaluation

In [None]:
chance_predictive = SviPredictive(
    m.linear_regression.model,
    guide=m.linear_regression.guide,
    num_samples=num_samples,
    return_sites=('chance_off_admit-obs','_RETURN')
)
properties = {
    'x': 0,
    'x_label': "GRE Score",
    'y_label': "Chance of Admit",
    'category': 6,
    'category_labels': {
        0: "No exp. in research",
        1: "Exp. in research",
    }
}
predictors = {
    'SVI - Chance of Admit': chance_predictive
}
data = {
    'x': x_test,
    'y': torch.log(y_test_chance / (1 - y_test_chance)) # inverse of nn.Sigmoid()
}
plot_predictions(data, predictors, properties, obs_site_name='chance_off_admit-obs')

## Award Model Evaluation

In [None]:
award_predictive = SviPredictive(
    m.model,
    guide=m.guide,
    num_samples=num_samples,
    return_sites=('stacked-obs','_RETURN')
)
properties = {
    'x': 0,
    'x_label': "GRE Score",
    'y_label': "Award",
    'category': 6,
    'category_labels': {
        0: "No exp. in research",
        1: "Exp. in research",
    }
}
predictors = {
    'SVI - Award': award_predictive
}
data = {
    'x': x_test,
    'y': y_test_award
}
plot_predictions(data, predictors, properties, obs_site_name='stacked-obs')