In [387]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import  TensorDataset, DataLoader # for dealing with data#
import torch.optim as optim # Optimization package
from sklearn.model_selection import train_test_split

In [347]:
# Read in dataset 
nswre74_control = pd.read_excel('/Users/mawuliagamah/gitprojects/causal_inference/causal_inference/datasets/nswre74_control.xls')
nswre74_treated = pd.read_excel('/Users/mawuliagamah/gitprojects/causal_inference/causal_inference/datasets/nswre74_treated.xlsx')

#Combine treatment and control as pandas dataframe
nswre74_pd_df = pd.concat([nswre74_control,nswre74_treated])
#Drop the outcome variable -> as to not bias results
nswre74_pd_df.drop('re78', axis=1, inplace=True)
nswre74_pd_df['idx'] = range(1, len(nswre74_pd_df) + 1)

In [412]:
## Data pre-processing

# Z-score normalisation 
def z_score_standardization(series):
    return (series - series.mean()) / series.std()

for col in nswre74_pd_df[['age','education','re74','re75']]:
    nswre74_pd_df[col] =  z_score_standardization(nswre74_pd_df[col])

#convert to pytorch tensor 
nswre74_tensor = torch.from_numpy(np.array(nswre74_pd_df))
nswre74_array = np.array(nswre74_pd_df)

#Train test split
#X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size=0.20, random_state=42)

features = nswre74_array[:,1:]     # select the rest 
target = nswre74_array[::,0]  

### Custom class for datasets

In [405]:
class CustomDataset:
  def __init__(self,features,targets):
    self.features = features
    self.targets = targets

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self,idx):
    current_sample = self.features[idx,:]
    current_target = self.targets[idx]
    return {
      "features": torch.tensor(current_sample,dtype=torch.float),
      "target": torch.tensor(current_target,dtype=torch.long),
      }

In [414]:
batch_size = 4

train_data, test_data, train_targets , test_targets = train_test_split(features,target,stratify=target)

nsw_74_train = CustomDataset(features = train_data,targets = train_targets)
nsw_74_test = CustomDataset(features = test_data,targets = test_targets)

train_loader = DataLoader(dataset=nsw_74_train, batch_size=batch_size, num_workers=2)
test_loader = DataLoader(dataset=nsw_74_train, batch_size=batch_size, num_workers=2)

In [322]:
# set Hyperparameters 

# Number of input units, must match number of input features
n_input = 8  
# Number of hidden units    
n_hidden = 15  
 # Number of output units            
n_output = 356               
num_classes = 1

number_of_epochs = n_input*3
learning_rate = 1e-5
max_iterations = 200


In [327]:
# Define neural netowrk class 
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(n_input,n_hidden),# Neural net input layer ,  8 input layers, to 15 hidden neurons
            nn.ReLU(), # ReLu activation function for hidden layer 1
            nn.Linear(n_hidden, num_classes), # Hidden layer 2
            nn.ReLU(),# ReLu activation function for hidden layer 2 
            nn.Softmax(dim=0), #Output activation  
        )
    def forward(self, x):
        x = self.flatten(x)
        out = self.linear_relu_stack(x)
        print(x.size())
        return out


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

model = NeuralNetwork().to(device)                     #Initialise model



Using cpu device


##     Training the model

In [339]:
#Training loop


# Binary cross entropy loss
criterion = nn.BCELoss() 
# Stochastic gradient descent optimizer 
optimizer = optim.SGD(model.parameters(), lr=learning_rate) # Algorithm find min/max of loss functon 

n_total_steps = len(train_loader)
for epoch in range(number_of_epochs):
    running_loss = 0
    for i, data in enumerate(train_loader): 
        inputs, labels = data
        # Zero gradients for batch 
        optimizer.zero_grad()
        # Every data instance is an input + label pair
        inputs =  X_train
        labels = Y_train
        # Forward pass and loss calculation
        model_output = model(inputs)
        model_output = model_output[:,0] 
        loss = criterion(model_output.float(), labels.float())
        #Backwards pass , optimise
        loss.backward()
        optimizer.step()
        # Gather data and report
        running_loss += loss.item()
        if i % 1 == 0:
            print (f'Epoch [{epoch+1}/{number_of_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
            running_loss = 0.0
            
print('Finished training')

torch.Size([356, 8])
Epoch [1/24], Step [1/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [2/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [3/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [4/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [5/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [6/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [7/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [8/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [9/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [10/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [11/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [1/24], Step [12/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [2/24], Step [1/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [2/24], Step [2/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [2/24], Step [3/12], Loss: 2.3752
torch.Size([356, 8])
Epoch [2/24], Step [4/12], Loss: 2.3752
torch.Size([356, 8])


In [336]:
# Check accuracy 
import sklearn
from sklearn.metrics import accuracy_score


predictions = []
y_pred = model(X_test)
predictions.append(y_pred.argmax())


#score = sklearn.metrics.accuracy_score(Y_test, predictions, normalize=True, sample_weight=None)

#print(score)


torch.Size([89, 8])


<p> cps1 match </p>

In [340]:
y_pred = model(X_test)
y_pred

torch.Size([89, 8])


tensor([[0.0096],
        [0.0100],
        [0.0103],
        [0.0135],
        [0.0094],
        [0.0094],
        [0.0094],
        [0.0098],
        [0.0100],
        [0.0094],
        [0.0094],
        [0.0096],
        [0.0103],
        [0.0106],
        [0.0104],
        [0.0127],
        [0.0094],
        [0.0164],
        [0.0116],
        [0.0147],
        [0.0094],
        [0.0105],
        [0.0097],
        [0.0099],
        [0.0107],
        [0.0102],
        [0.0130],
        [0.0104],
        [0.0129],
        [0.0106],
        [0.0104],
        [0.0233],
        [0.0094],
        [0.0138],
        [0.0097],
        [0.0123],
        [0.0121],
        [0.0114],
        [0.0094],
        [0.0149],
        [0.0103],
        [0.0168],
        [0.0139],
        [0.0173],
        [0.0142],
        [0.0094],
        [0.0098],
        [0.0128],
        [0.0097],
        [0.0094],
        [0.0099],
        [0.0129],
        [0.0099],
        [0.0094],
        [0.0110],
        [0

In [320]:
Y_test

tensor([1., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0.,
        1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1.,
        1., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 1., 1.,
        0., 1., 0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 0., 1.,
        0., 1., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 1.])

In [274]:
for i,data in enumerate(X_test):
  print(i)
  print(data)

0
tensor([-1.0381, -0.1091,  1.0000,  0.0000,  0.0000,  1.0000, -0.3920, -0.4371])
1
tensor([-0.0522, -0.1091,  1.0000,  0.0000,  0.0000,  1.0000, -0.3920, -0.4371])
2
tensor([-0.0522,  1.0069,  1.0000,  0.0000,  1.0000,  0.0000, -0.3920, -0.4371])
3
tensor([-0.8973, -1.2251,  1.0000,  0.0000,  0.0000,  1.0000,  0.0996,  0.4951])
4
tensor([-0.1931,  1.0069,  1.0000,  0.0000,  0.0000,  0.0000, -0.3920, -0.4371])
5
tensor([ 0.7928,  1.0069,  1.0000,  0.0000,  0.0000,  0.0000, -0.3920, -0.4371])
6
tensor([-0.7564,  1.0069,  1.0000,  0.0000,  0.0000,  0.0000, -0.3920, -0.2492])
7
tensor([ 1.2153,  0.4489,  1.0000,  0.0000,  0.0000,  1.0000, -0.3920, -0.4371])
8
tensor([ 0.0886, -0.1091,  1.0000,  0.0000,  0.0000,  1.0000, -0.3920, -0.4371])
9
tensor([-0.4747,  1.0069,  1.0000,  0.0000,  0.0000,  0.0000, -0.3920, -0.4371])
10
tensor([-0.1931,  1.0069,  0.0000,  0.0000,  0.0000,  0.0000, -0.3920, -0.4371])
11
tensor([-1.1789, -0.1091,  1.0000,  0.0000,  0.0000,  1.0000, -0.3920, -0.4371])
12