# Inverse Probability Weighting Model (scikit-learn score adapted)
Inverse probability weighting is a basic model to obtain average effect estimation.

It calculates the probability of each sample to belong to its group,   
and use its inverse as the weight of that sample:
$$
w_i = \frac{1}{\Pr[A=a_i | X_i]}
$$

In [101]:
%matplotlib inline
from causallib.datasets import load_nhefs
from causallib.estimation import IPW
from causallib.evaluation import PropensityEvaluator
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import cross_val_score,GridSearchCV
import pandas as pd


In order to use built-in scoring from scikit-learn, we need to either supply a "scoring" function or implement a "score" method. For now, we are doing it with a score method.

In [109]:
class ScoredIPW(IPW):
    def fit(self, dataX,Y):
        X = dataX.copy()
        a = X.pop("a")
        return super().fit(X,a)
        
    def score(self,dataX,Y,metrics=None):
        from sklearn import metrics
        if metrics is None:
            metrics = {"roc_auc": metrics.roc_auc_score,
           "avg_precision": metrics.average_precision_score,}
        
        X = dataX.copy()
        a = X.pop("a")
        w = self.compute_weights(X,a)
        yhat,oneminusyhat = self.learner.predict_proba(X).T
        #we'll use negative roc_auc because lower is better
        score = -metrics.roc_auc_score(a,yhat,sample_weight=w)    
        return score

    def get_params(self, deep=True):
        return {"use_stabilized": self.use_stabilized, "truncate_eps": self.truncate_eps, "learner":self.learner}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
        

#### Data:
The effect of quitting to smoke on weight loss.  
Data example is taken from [Hernan and Robins Causal Inference Book](https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/)

In [7]:
data = load_nhefs()
data.X.join(data.a).join(data.y).head()

Unnamed: 0,age,race,sex,smokeintensity,smokeyrs,wt71,active_1,active_2,education_2,education_3,education_4,education_5,exercise_1,exercise_2,age^2,wt71^2,smokeintensity^2,smokeyrs^2,qsmk,wt82_71
0,42,1,0,30,29,79.04,0,0,0,0,0,0,0,1,1764,6247.3216,900,841,0,-10.09396
1,36,0,0,20,24,58.63,0,0,1,0,0,0,0,0,1296,3437.4769,400,576,0,2.60497
2,56,1,1,20,26,56.81,0,0,1,0,0,0,0,1,3136,3227.3761,400,676,0,9.414486
3,68,1,0,3,53,59.42,1,0,0,0,0,0,0,1,4624,3530.7364,9,2809,0,4.990117
4,40,0,0,20,19,87.09,1,0,1,0,0,0,1,0,1600,7584.6681,400,361,0,4.989251


To fit the sklearn api we need X and Y for fit. Our model uses X, Y and a. For this version we put a inside X. Now `cross_val_score` works out of the box.

In [110]:

ipw = ScoredIPW(LogisticRegression(solver="liblinear"))

dataX = data.X.copy()
dataX["a"] = data.a

ipw.fit(dataX,data.y)

cross_val_score(ipw,dataX,data.y,cv=5)


array([-0.48442185, -0.48038368, -0.50390762, -0.5555427 , -0.5734839 ])

At this point we can go for a long walk in the world of sklearn classifiers with `GridSearchCV`. Only those with a `predict_proba` method are usable. 

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

h = .02  # step size in the mesh

names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]

classifiers = [
    #KNeighborsClassifier(3),
    #SVC(kernel="linear", C=0.025,probability=True),
    #SVC(gamma=2, C=1,probability=True),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    LogisticRegression(solver="liblinear"),
    LogisticRegression()
]
classifiers = [
    LogisticRegression(solver="liblinear"),
    LogisticRegression(),
    LogisticRegression(penalty="l1", C=0.01, max_iter=500, solver='liblinear'),
    MLPClassifier(alpha=1, max_iter=1000),
    #SVC(kernel="linear", C=0.025,probability=True),
    
]
gscv = GridSearchCV(ScoredIPW(learner=LogisticRegression(solver="liblinear")),param_grid={"learner":classifiers[:4],"use_stabilized":[True,False]})
search = gscv.fit(dataX,data.y)

In [117]:
pd.DataFrame(search.cv_results_,).set_index(["param_learner","param_use_stabilized"]).sort_values("rank_test_score")

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
param_learner,param_use_stabilized,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
"LogisticRegression(C=0.01, max_iter=500, penalty='l1', solver='liblinear')",True,0.027884,0.012213,0.006433,0.002875,"{'learner': LogisticRegression(C=0.01, max_ite...",-0.471578,-0.506176,-0.503257,-0.528899,-0.522056,-0.506393,0.019869,1
"LogisticRegression(C=0.01, max_iter=500, penalty='l1', solver='liblinear')",False,0.01966,0.000908,0.004275,0.00052,"{'learner': LogisticRegression(C=0.01, max_ite...",-0.47162,-0.50617,-0.50359,-0.528619,-0.522088,-0.506418,0.019786,2
LogisticRegression(),True,0.2064,0.05208,0.012949,0.000552,"{'learner': LogisticRegression(), 'use_stabili...",-0.466436,-0.495251,-0.532969,-0.529717,-0.53968,-0.512811,0.027832,3
LogisticRegression(),False,0.298289,0.088396,0.011026,0.000842,"{'learner': LogisticRegression(), 'use_stabili...",-0.466436,-0.495251,-0.532969,-0.529717,-0.53968,-0.512811,0.027832,3
LogisticRegression(solver='liblinear'),False,0.010276,0.000779,0.003908,0.000209,{'learner': LogisticRegression(solver='libline...,-0.484422,-0.480384,-0.503908,-0.555543,-0.573484,-0.519548,0.037992,5
LogisticRegression(solver='liblinear'),True,0.011518,0.000995,0.004789,0.000264,{'learner': LogisticRegression(solver='libline...,-0.484422,-0.480384,-0.503908,-0.555543,-0.573484,-0.519548,0.037992,6
"MLPClassifier(alpha=1, max_iter=1000)",True,1.398105,0.381477,0.017522,0.004077,"{'learner': MLPClassifier(alpha=1, max_iter=10...",-0.989326,-1.0,-1.0,-0.999977,-1.0,-0.997861,0.004267,7
"MLPClassifier(alpha=1, max_iter=1000)",False,1.635659,0.117899,0.020842,0.005405,"{'learner': MLPClassifier(alpha=1, max_iter=10...",-0.999998,-1.0,-1.0,,-0.99999,,,8
