In [None]:
%reset

In [None]:
import pandas as pd
import numpy as np

import sys, os

import seaborn as sns
import matplotlib.pyplot as plt

from causalinference import CausalModel

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from importlib import reload

from scipy.stats import wasserstein_distance
from scipy import stats

In [None]:
from numpy import vstack
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD
from torch.nn import BCELoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
os.chdir('/home/adam/adam/causal_inference')

from causal_inference.causal_data_handler.get_data import process_data
from causal_inference.causal_data_handler.get_data import get_training_indices
from causal_inference.causal_data_handler.get_data import get_data
from causal_inference.causal_data_handler.get_data import get_covariate_names
from causal_inference.causal_data_handler.get_data import get_bootstrapped_experiments

from causal_inference.model.cfr import UseCase

In [None]:
os.chdir('/home/adam/adam/data/19012021/')
# load the dataset
dataset = UseCase('data_guerin_rct.csv',
                  'pf_ratio_2h_8h_outcome',
                  'treated',
                  seed=1234)
# calculate split
train, test = dataset.get_splits()
# prepare data loaders
train_dl = DataLoader(train, batch_size=1, shuffle=True)
test_dl = DataLoader(test, batch_size=1, shuffle=False)


In [None]:
os.chdir('/home/adam/adam/data/19012021/')
df = pd.read_csv('data_guerin_rct.csv')
df.info(max_cols=200)

In [None]:
OUTCOME = 'pf_ratio_2h_8h_manual_outcome'
df = process_data(df=df, outcome=OUTCOME)
df.info()

In [None]:
idx = get_training_indices(df)

y_train, t_train, X_train = get_data(df=df.loc[idx, :],
                                     treatment_col='treated',
                                     outcome_col=OUTCOME)

y_test, t_test, X_test = get_data(df=df.loc[(~idx), :],
                                     treatment_col='treated',
                                     outcome_col=OUTCOME)

covariates = get_covariate_names(df=df,
                                 treatment_col='treated',
                                 outcome_col=OUTCOME)

In [None]:
#t_train = t_train[..., np.newaxis]
#y_train = y_train[..., np.newaxis]
#X_train = X_train[..., np.newaxis]
#
#t_test = t_test[..., np.newaxis]
#y_test = y_test[..., np.newaxis]
#X_test = X_test[..., np.newaxis]

In [None]:
print(t_train.shape)
print(y_train.shape)
print(X_train.shape)
print(t_test.shape)
print(y_test.shape)
print(X_test.shape)

In [None]:
train = get_bootstrapped_experiments(y_train, t_train, X_train, n_of_experiments=1000, method='train')
test = get_bootstrapped_experiments(y_test, t_test, X_test, n_of_experiments=1000, method='test')

In [None]:
os.chdir('/home/adam/adam/cfrnet/data')

np.savez('bguerin_2_8.train.npz', yf=train[0], t=train[1], x=train[2])
np.savez('bguerin_2_8.test.npz', yf=test[0], t=test[1], x=test[2])

In [None]:
test[0].shape

In [None]:
t_train.shape

In [None]:
## Standard libraries
import os
import json
import math
import numpy as np

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
import seaborn as sns
sns.set()

## Progress bar
#from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim

In [None]:
class CfR(nn.Module):
    def __init__(self, input_dim):
        super(CfR, self).__init__()
        input_dim = input_dim - 1
        self.representation_1 = nn.Linear(input_dim, input_dim)
        self.representation_2 = nn.Linear(input_dim, input_dim)
        self.representation_3 = nn.Linear(input_dim, input_dim)

        self.treated_1 = nn.Linear(input_dim, input_dim)
        self.treated_2 = nn.Linear(input_dim, input_dim)
        self.treated_3 = nn.Linear(input_dim, 1)

        self.control_1 = nn.Linear(input_dim, input_dim)
        self.control_2 = nn.Linear(input_dim, input_dim)
        self.control_3 = nn.Linear(input_dim, 1)


    def forward(self, x):
        
        t = x[:, 0]
        x = x[:, 1:]
        # Representation Network
        x = self.representation_1(x)
        x = F.elu(x)
        x = self.representation_2(x)
        x = F.elu(x)
        x = self.representation_3(x)
        x = F.elu(x)

        # Choosing the arm
        if t == 1:
            x = self.treated_1(x)
            x = F.elu(x)
            x = self.treated_2(x)
            x = F.elu(x)
            x = self.treated_3(x)

        if t==0:
            x = self.control_1(x)
            x = F.elu(x)
            x = self.control_2(x)
            x = F.elu(x)
            x = self.control_3(x)

        return x

In [None]:
X_train = torch.tensor(np.hstack((t_train.reshape(len(t_train), 1), X_train)), dtype=torch.float)
y_train = torch.tensor(y_train.reshape(len(t_train), 1), dtype=torch.float)

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
losses1 = []

for t in range(20):
    y_pred = model(X_train)

    loss = criterion(y_pred, y_train)
    print(t, loss.item())
    losses1.append(loss.item())

    if torch.isnan(loss):
        break

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
def customized_loss(X, y):
    X_similarity = Variable(similarity_matrix(X), requires_grad = True)
    association = Variable(convert_y(y), requires_grad = True)
    temp = torch.mul(X_similarity, association)
    loss_num = torch.sum(torch.mul(X_similarity, association))
    loss_all = torch.sum(X_similarity)
    loss_denum = loss_all - loss_num
    loss = loss_num/loss_denum
    return loss

In [None]:
def train_model(train_dl, model):
    # define the optimization
    criterion = torch.nn.MSELoss()
    optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
    # enumerate epochs
    for epoch in range(20):
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(train_dl):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs.float())
            # calculate loss
            loss = criterion(yhat, targets.float())
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()

In [None]:
model = CfR(27)

In [None]:
train_model(train_dl, model)

In [None]:
prediction = []

for i, (inputs, targets) in enumerate(test_dl):
    prediction.append(model(inputs.float()))

In [None]:
prediction

In [None]:
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)

set_seed(1234)

In [None]:
## Standard libraries
import os
import json
import math
import numpy as np

In [None]:
## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgb
import matplotlib
matplotlib.rcParams['lines.linewidth'] = 2.0
import seaborn as sns
sns.reset_orig()
sns.set()

In [None]:
## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
# Torchvision
import torchvision
from torchvision.datasets import CIFAR10
from torchvision import transforms

In [None]:
# PyTorch Lightning
try:
    import pytorch_lightning as pl
except ModuleNotFoundError: # Google Colab does not have PyTorch Lightning installed by default. Hence, we do it here if necessary
    !pip install pytorch-lightning==1.0.3
    import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

In [None]:
# Tensorboard extension (for visualization purposes later)
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard

# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = "../data"
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "../saved_models/tutorial9"

# Setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

In [None]:
np.ones((3,2))

In [None]:
new = np.array([])

In [None]:
np.append(new, np.ones, axis=2)