In [None]:
import sys, os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import statsmodels.api as sm

from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import RandomForestRegressor

In [None]:
seed_value = 1
np.random.seed(seed_value)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
os.chdir('/home/adam/adam/causal_inference')
from causal_inference.model.ols import OLS
from causal_inference.model.propensity import PropensityScore
from causal_inference.model.blocking import Blocking
from causal_inference.experiments.run import Experiment

In [None]:
os.chdir('/home/adam/adam/cfrnet/data/')

train_data = np.load('bfpguerin_2_8.train.npz')
y_train, t_train, X_train = train_data['yf'], train_data['t'], train_data['x']

test_data = np.load('bfpguerin_2_8.test.npz')
y_test, t_test, X_test = test_data['yf'], test_data['t'], test_data['x']

In [None]:
N_OF_EXPERIMENTS = 5

In [None]:
model = OLS()
ols = Experiment(model=OLS(), n_of_experiments=N_OF_EXPERIMENTS).run(y_train=y_train,
                                                                   t_train=t_train,
                                                                   X_train=X_train,
                                                                   y_test=y_test,
                                                                   t_test=t_test,
                                                                   X_test=X_test)

In [None]:
ols.results

In [None]:
ols

In [None]:
os.chdir('/home/adam/adam/data/results_fix/')
os.getcwd()


#### RAW

check the raw diff on train

In [None]:
results_ols = run_experiment_ols(y_train=train_data['yf'], y_test=test_data['yf'],
                             t_train=train_data['t'], t_test=test_data['t'],
                             X_train=train_data['x'], X_test=test_data['x'],
                             n_of_experiments=n_of_experiments,
                             method='raw')

In [None]:
results_ols

In [None]:
results_summary(results_ols)

#### OLS

In [None]:
results_ols = run_experiment_ols(y_train=train_data['yf'], y_test=test_data['yf'],
                             t_train=train_data['t'], t_test=test_data['t'],
                             X_train=train_data['x'], X_test=test_data['x'],
                             n_of_experiments=n_of_experiments,
                             method='ols')


In [None]:
np.savetxt("results_ols_pf_ratio_2h_8h_manual_outcome.csv", results_ols, delimiter=",", fmt='%1.2f')

In [None]:
results_ols = pd.read_csv('results_ols_pf_ratio_2h_8h_manual_outcome.csv', header=None)
results_ols.columns = ['ate', 'rmse', 'r2']

In [None]:
summary_ols = results_summary(results_ols)
print(summary_ols)
#np.savetxt("summary_ols_pf_ratio_2h_8h_manual_outcome.csv", summary_ols, delimiter=",", fmt='%1.2f')


#### IPW


In [None]:
results_ipw = run_experiment_ols(y_train=train_data['yf'], y_test=test_data['yf'],
                             t_train=train_data['t'], t_test=test_data['t'],
                             X_train=train_data['x'], X_test=test_data['x'],
                             n_of_experiments=n_of_experiments,
                             method='ipw')

#np.savetxt("results_ipw_pf_ratio_2h_8h_manual_outcome.csv", results_ipw, delimiter=",", fmt='%1.2f')

In [None]:
results_ipw = pd.read_csv('results_ipw_pf_ratio_2h_8h_manual_outcome.csv', header=None)
results_ipw.columns = ['ate', 'rmse', 'r2']

In [None]:
summary_ipw = results_summary(results_ipw)
print(summary_ipw)
#np.savetxt("summary_ipw_pf_ratio_2h_8h_manual_outcome.csv", summary_ipw, delimiter=",", fmt='%1.2f')

#### Stratification

In [None]:
results_stratify = run_experiment_ols(y_train=train_data['yf'], y_test=test_data['yf'],
                             t_train=train_data['t'], t_test=test_data['t'],
                             X_train=train_data['x'], X_test=test_data['x'],
                             n_of_experiments=n_of_experiments,
                             method='stratify')

#np.savetxt("results_stratify_pf_ratio_2h_8h_manual_outcome.csv", results_stratify, delimiter=",", fmt='%1.2f')

In [None]:
results_stratify = pd.read_csv('results_stratify_pf_ratio_2h_8h_manual_outcome.csv', header=None)
results_stratify.columns = ['ate', 'rmse', 'r2']

In [None]:
summary_stratify = results_summary(results_stratify)
print(summary_stratify)
#np.savetxt("summary_stratify_pf_ratio_2h_8h_manual_outcome.csv", summary_stratify, delimiter=",", fmt='%1.2f')


#### 2-OLS

In [None]:
results_2ols = run_experiment_ols(y_train=train_data['yf'], y_test=test_data['yf'],
                             t_train=train_data['t'], t_test=test_data['t'],
                             X_train=train_data['x'], X_test=test_data['x'],
                             n_of_experiments=n_of_experiments,
                             method='2-ols')

np.savetxt("results_2ols_pf_ratio_2h_8h_manual_outcome.csv", results_2ols, delimiter=",", fmt='%1.2f')

In [None]:
summary_2ols = results_summary(results_2ols)
print(summary_2ols)
np.savetxt("summary_2ols_pf_ratio_2h_8h_manual_outcome.csv", summary_2ols, delimiter=",", fmt='%1.2f')

#### RF

In [None]:
results_rf = run_experiment_ols(y_train=train_data['yf'], y_test=test_data['yf'],
                             t_train=train_data['t'], t_test=test_data['t'],
                             X_train=train_data['x'], X_test=test_data['x'],
                             n_of_experiments=n_of_experiments,
                             method='rf')

np.savetxt("results_rf_pf_ratio_2h_8h_manual_outcome.csv", results_rf, delimiter=",", fmt='%1.2f')

In [None]:
summary_rf = results_summary(results_rf)
print(summary_rf)
np.savetxt("summary_rf_pf_ratio_2h_8h_manual_outcome.csv", summary_rf, delimiter=",", fmt='%1.2f')

## Pscore saving

In [None]:
path = 'pscore_2_8_outcome.png'
save_propensity_plot(t=train_data['t'], X=train_data['x'], path=path)

## Additioanl

In [None]:
import numpy as np

from scipy.stats import uniform, randint

from sklearn.datasets import load_breast_cancer, load_diabetes, load_wine
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, RandomizedSearchCV, train_test_split

import xgboost as xgb

import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
xgb_model = xgb.XGBRegressor(objective="reg:squarederror", random_state=42)

xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)

rmse=mean_squared_error(y_test, y_pred, squared=False)

print(rmse)

In [None]:
from sklearn.metrics import r2_score

coefficient_of_dermination = r2_score(y_test, y_pred)
print(coefficient_of_dermination)


### Saving results for CfR

In [None]:
os.chdir('/home/adam/adam/data/results/')
os.getcwd()

In [None]:
ate_tarnet = pd.read_csv('results_tarnet_pf_ratio_2h_8h_manual_outcome.csv', header=None)
ate_cfr = pd.read_csv('results_cfr_pf_ratio_2h_8h_manual_outcome.csv', header=None)

In [None]:
ate_cfr.columns = ['ate', 'rmse', 'r2']
ate_tarnet.columns = ['ate', 'rmse', 'r2']

In [None]:
results_summary(ate_cfr)

In [None]:
results_summary(ate_tarnet)