In [13]:
from causallib.datasets import load_nhefs
from causallib.estimation import IPW
from causallib.evaluation import evaluate
from sklearn.linear_model import LogisticRegression
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [14]:
data = {
    'X': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'],
    'Y': ['1', '1', '1', '2', '2', '2', '3', '3', '3'],
    'Ri': ['1', '0', '0', '1', '1', '1', '1', '1', '0'],
}

In [15]:
df = pd.DataFrame(data) 

In [16]:
df["X"] = df["X"].astype('category')
df["X"] = df["X"].cat.codes
df['Y'] = pd.to_numeric(df['Y'])
df['Ri'] = pd.to_numeric(df['Ri'])


In [17]:
Cov = ['X']

In [18]:
learner =  LogisticRegression(penalty="l2", C=1e10, max_iter=5000, solver='liblinear')
clip_min = 0.2
clip_max = 0.8
ipw = IPW(learner, clip_min=clip_min, clip_max=clip_max, use_stabilized=False)
ipw.fit(df[['X']], df['Ri'])

In [19]:
# Compute the weights
weights = ipw.compute_weights(df[['X']],df['Ri'])

# Add the weights to the dataframe
df['weights'] = weights

# Fit a linear model to the observed outcome measurements using weighted least squares regression with weights as they are.
model_ipw = sm.WLS(df[df['Ri'] == 1]['Y'], np.ones(sum(df['Ri'] == 1)), weights=df[df['Ri'] == 1]['weights'])

# Fit the model
results_ipw = model_ipw.fit()

# The only estimated coefficient is the intercept, which is the estimated mean response
mean_response_ipw = results_ipw.params[0]
print("Estimated mean response (l^IPW):", mean_response_ipw)



Estimated mean response (l^IPW): 2.052523213357489


In [20]:
results_ipw.summary()

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,Y,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,
Date:,"Fri, 21 Jul 2023",Prob (F-statistic):,
Time:,16:22:33,Log-Likelihood:,-6.5031
No. Observations:,6,AIC:,15.01
Df Residuals:,5,BIC:,14.8
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.0525,0.318,6.461,0.001,1.236,2.869

0,1,2,3
Omnibus:,,Durbin-Watson:,0.738
Prob(Omnibus):,,Jarque-Bera (JB):,0.347
Skew:,-0.506,Prob(JB):,0.841
Kurtosis:,2.397,Cond. No.,1.0


In [8]:
df

Unnamed: 0,X,Y,Ri,weights
0,0,1,1,2.030947
1,0,1,0,1.969982
2,0,1,0,1.969982
3,1,2,1,1.466481
4,1,2,1,1.466481
5,1,2,1,1.466481
6,2,3,1,1.25
7,2,3,1,1.25
8,2,3,0,5.0


In [9]:
# # Impute missing values
# model_ipw.predict(sm.add_constant(df.loc[df['Ri'] == 0, 'X']))

In [10]:
# Fit the model on observed data
# model = sm.WLS(df.loc[df['Ri'] == 1, 'Y'], sm.add_constant(df.loc[df['Ri'] == 1, 'X']), 
#                weights=df.loc[df['Ri'] == 1, 'weights']).fit()

# Predict the missing Y values
predicted_Y = model_ipw.predict(sm.add_constant(df.loc[df['Ri'] == 0, 'X']))

ValueError: shapes (6,1) and (3,2) not aligned: 1 (dim 1) != 3 (dim 0)

In [None]:
predicted_Y

In [None]:
df.loc[df['Ri'] == 1, 'Y']

In [None]:
sm.add_constant(df.loc[df['Ri'] == 1, 'X'])

In [11]:
sm.add_constant(df.loc[df['Ri'] == 0, 'X'])

Unnamed: 0,const,X
1,1.0,0
2,1.0,0
8,1.0,2


In [13]:
df[df['Ri'] == 1]['Y'] 

0    1
3    2
4    2
5    2
6    3
7    3
Name: Y, dtype: int64