## - Import

In [1]:
%matplotlib inline
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch import autograd
from torch.autograd import Variable
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import math

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print('torch version:',torch.__version__)
print('device:', device)

torch version: 2.0.0
device: cpu


## Initialization

In [3]:
X_size=1
Y_size=1
num_Samples=1000
#data_X=np.random.randn(num_Samples,X_size)
#data_Y=np.random.randn(num_Samples,Y_size)
data_X=np.random.randn(num_Samples,1)
data_Y=np.square(data_X)+np.random.randn(num_Samples,1)/100
## From 64 to 32 (or pytorch cannot use it)
data_X=np.float32(data_X)
data_Y=np.float32(data_Y)



In [4]:
def CGAN_train(data_X,data_Y,generator_layer_size=[10,10,10],discriminator_layer_size=[10,10,10],
              z_dim=2,learning_rate=1e-3,epochs=1000,batch_size=32):
    class Generator(nn.Module):
        def __init__(self, generator_layer_size,z_dim,X_dim,Y_dim):
            ## Initialization
            super().__init__()
            self.z_dim = z_dim
            self.X_dim=data_X.shape[1]
            self.Y_dim=data_Y.shape[1]
            ## Build the model
            self.model = nn.Sequential(
            nn.Linear(self.z_dim + self.X_dim, generator_layer_size[0]),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(generator_layer_size[0], generator_layer_size[1]),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(generator_layer_size[1], generator_layer_size[2]),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(generator_layer_size[2], self.Y_dim),
            nn.Tanh()
        )
    
        def forward(self, z, X):
            z_reshape = z.view(batch_size, self.z_dim)
            combined = torch.cat([z_reshape, X], 1)
            out = self.model(combined)
            return out.view(batch_size, Y_dim)
    
    class Discriminator(nn.Module):
        def __init__(self, discriminator_layer_size, X_dim,Y_dim):
            super().__init__()
            self.X_dim=data_X.shape[1]
            self.Y_dim=data_Y.shape[1]
            self.model = nn.Sequential(
            nn.Linear(self.Y_dim+self.X_dim, discriminator_layer_size[0]),
            nn.LeakyReLU(0.2, inplace=True),
            #nn.Dropout(0.3),
            nn.Linear(discriminator_layer_size[0], discriminator_layer_size[1]),
            nn.LeakyReLU(0.2, inplace=True),
            #nn.Dropout(0.3),
            nn.Linear(discriminator_layer_size[1], discriminator_layer_size[2]),
            nn.LeakyReLU(0.2, inplace=True),
            #nn.Dropout(0.3),
            nn.Linear(discriminator_layer_size[2], 1),
            nn.Sigmoid()
        )
        def forward(self, X, Y):
            Y = Y.view(-1, self.Y_dim)
            combined = torch.cat([X,Y], 1)
            out = self.model(combined)
            return out.squeeze()
    
    ##Define generator, discriminator, loss function and optimizer
    X_dim=data_X.shape[1]
    Y_dim=data_Y.shape[1]
    generator = Generator(generator_layer_size,z_dim,X_dim,Y_dim).to(device)
    discriminator = Discriminator(discriminator_layer_size, data_X.shape[1],data_Y.shape[1]).to(device)
    criterion = nn.BCELoss()
    g_optimizer = torch.optim.Adam(generator.parameters(), lr=learning_rate)
    d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=learning_rate)
    
    ## Define function for training one epoch
    def generator_train_step(batch_size, discriminator, generator, g_optimizer, criterion):
        g_optimizer.zero_grad()
        z = Variable(torch.randn(batch_size, z_dim)).to(device) ## Generate Noise
        ## Generate Fake Data
        fake_X = Variable(torch.randn(batch_size, X_dim)).to(device) 
        fake_Y = generator(z, fake_X)
        ## Sent the fake data to discriminator
        validity = discriminator(fake_X, fake_Y)
        ## Train Model and Back Propagation
        g_loss = criterion(validity, Variable(torch.ones(batch_size)).to(device))
        g_loss.backward()
        g_optimizer.step()
        return g_loss.data
    def discriminator_train_step(batch_size, discriminator, generator, d_optimizer, criterion, X,Y):
        d_optimizer.zero_grad()
        ## Get the Real Data
        real_validity = discriminator(X,Y)
        real_loss = criterion(real_validity, Variable(torch.ones(batch_size)).to(device))
        ## Get the Fake data
        z = Variable(torch.randn(batch_size, z_dim)).to(device)
        fake_X = Variable(torch.randn(batch_size, X_dim)).to(device)
        fake_Y = generator(z, fake_X)
        fake_validity = discriminator(fake_X,fake_Y)
        fake_loss = criterion(fake_validity, Variable(torch.zeros(batch_size)).to(device)) ## Define Fake Loss
        d_loss = real_loss + fake_loss ## Total Loss=Loss from both fake data and real data
        ## Back Propagation
        d_loss.backward()
        d_optimizer.step()
        return d_loss.data
    ## Train the model
    for epoch in range(epochs):
        print('Starting epoch {}...'.format(epoch+1))
        iteration=int(data_X.shape[0]/batch_size)
        for i in range(0,iteration):
            if i==iteration:
                myrange=range(i*batch_size,data_X.shape[0]+1)
            else:
                myrange=range(i*batch_size,(i+1)*batch_size)
            ## Get Real Data
            real_Y=torch.from_numpy(data_Y[myrange])
            real_X=torch.from_numpy(data_X[myrange])
            generator.train()
            d_loss = discriminator_train_step(batch_size, discriminator, generator, d_optimizer, criterion, real_X,real_Y)
            g_loss = generator_train_step(batch_size, discriminator, generator, g_optimizer, criterion)
            z=Variable(torch.randn(batch_size, z_dim)).to(device)
            sample = generator(z, real_X).unsqueeze(1).data.cpu() ## Generate a Fake Sample
            if epoch==epochs-1:
                if i==0:
                    prediction=sample.numpy().reshape(batch_size,Y_dim)
                if i>0:
                    temp=sample.numpy().reshape(batch_size,Y_dim)
                    prediction=np.concatenate((prediction,temp))
        generator.eval()
        print('g_loss: {}, d_loss: {}'.format(g_loss, d_loss))
    
    
    ## Now we generate data to put into the classifier in the next step
    fake_data_combine=np.concatenate((data_X[range(prediction.shape[0]),],prediction),axis=1)
    print(fake_data_combine.shape)
    real_data_combine=np.concatenate((data_X[range(prediction.shape[0]),],data_Y[range(prediction.shape[0]),]),axis=1)
    dataset_for_classification=np.concatenate((fake_data_combine,real_data_combine),axis=0)
    labels=np.zeros((1,fake_data_combine.shape[0]))
    labels_1=np.ones((1,real_data_combine.shape[0]))
    labels=np.concatenate((labels,labels_1))
    labels=np.float32(labels)
    labels=labels.flatten()
    labels=torch.from_numpy(labels)
    return(dataset_for_classification,labels)

## Classification Network

In [5]:
def classification_network_fit(dataset,labels,dataset_v,labels_v,neural_size=[10,10,10],epochs=10,batch_size=1):
    neural_size=[10,10,10]
    input_dim=dataset.shape[1]
    class NeuralNets(nn.Module):
        def __init__(self, discriminator_layer_size, input_dim):
            super().__init__()
        
            self.input_dim=input_dim
            self.model = nn.Sequential(
            
            #nn.Linear(input_dim, neural_size[0]),
            #nn.LeakyReLU(0.2, inplace=True),
            #nn.Linear(neural_size[0], neural_size[1]),
            #nn.LeakyReLU(0.2, inplace=True),
            #nn.Linear(neural_size[1], neural_size[2]),
            #nn.LeakyReLU(0.2, inplace=True),
            #nn.Linear(neural_size[2], 1),
            
            #nn.Linear(input_dim,1),
            
            nn.Linear(input_dim, neural_size[0]),
            nn.LeakyReLU(0.2, inplace=True),    
            nn.Linear(neural_size[0],1),
            nn.Sigmoid()
        )
        def forward(self, X):
            out = self.model(X)
            return out.squeeze()
    model_c=NeuralNets(neural_size,dataset.shape[1],)
    loss_for_classification=torch.nn.BCELoss()
    optimizer_for_classification=torch.optim.SGD(model_c.parameters(), lr=0.01)
    for epoch in range(20):
        print('Starting epoch {}...'.format(epoch+1))
        iteration=int(data_X.shape[0]/batch_size)
        for i in range(0,iteration):
            data=torch.from_numpy(dataset)
            optimizer_for_classification.zero_grad()
            outputs=model_c(data)
            loss_c=loss_for_classification(outputs,labels)
            loss_c.backward()
            optimizer_for_classification.step()
        
    
    
    prediction=model_c(torch.from_numpy(dataset_v))
    return prediction,labels_v
    
        

In [6]:
import random
def adversarial_causal_test(x,y,epochs=1000):
    data,label=CGAN_train(x,y,epochs=epochs)
    X_dim=x.shape[1]
    ## We will only keep Y but not X
    data=np.delete(data,range(X_dim),1)
    
    ## Train-test split
    training_index=list(random.sample(range(0,data.shape[0]),int(data.shape[0]*0.8)))
    validation_index=list(set(range(0,data.shape[0]))-set(training_index))
    data_Y_train=data[training_index,:]
    label_train=label[training_index]
    data_Y_validation=data[validation_index,:]
    label_validation=label[validation_index]
    
    ## Send the result to classifier
    prediction_v,labels_v=classification_network_fit(data_Y_train,label_train,data_Y_validation,label_validation)
    
    ## see how well the classifier performs
    result=[]
    for i in range(0,prediction_v.shape[0]):
        if ((prediction_v[i]>0.5) and (labels_v[i]==1))or ((prediction_v[i]<0.5) and (labels_v[i]==0)):
            result.append(1)
        else:
            result.append(0)
    return result,prediction_v.shape[0]
    ## Adversarial Causal Tests
    



In [7]:
import scipy
import csv
import random

def get_list(mydata,index):
    mylist=[]
    for i in index:
        mylist.append(mydata[i])
    return mylist

def get_test_statistic(data_X,data_Y):
    XtoY,len1=adversarial_causal_test(data_X,data_Y)
    YtoX,len2=adversarial_causal_test(data_Y,data_X)
    TCXtoY=sum(np.asarray(XtoY))/len(XtoY)
    
    TCYtoX=sum(np.asarray(YtoX))/len(YtoX)
    
    ## Theoretical Variance
    sigmasquared=2*(1/(4*len(XtoY))-sum((XtoY-TCXtoY)*(YtoX-TCYtoX)/(len(XtoY)-1)))
    
    
    ## Bootstrap variance
    difference_set=[]
    for i in range(0,100):
        index=random.choices(list(range(0,len(XtoY))),k=len(XtoY))
        XtoYstar=get_list(XtoY,index)
        YtoXstar=get_list(YtoX,index)
        difference=sum(np.asarray(XtoYstar))/len(XtoY)-sum(np.asarray(YtoXstar))/len(YtoX)
        difference_set.append(difference)
    sigmasquared_b=np.var(difference_set)
    TCE=(TCXtoY-TCYtoX)**2/sigmasquared
    TCE_b=(TCXtoY-TCYtoX)**2/sigmasquared_b
    
    return TCXtoY, TCYtoX, TCE, TCE_b, sigmasquared, sigmasquared_b

#A=get_test_statistic(data_X,data_Y)


    


with open('result3.csv','w',newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=' ',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for i in range(100):
        data_X=np.random.randn(num_Samples,1)
        data_Y=np.power(data_X,3)+np.random.randn(num_Samples,1)/10
        data_X=np.float32(data_X)
        data_Y=np.float32(data_Y)
        result=get_test_statistic(data_X,data_Y)
        writer.writerow(result)


[207,
 128,
 353,
 166,
 384,
 198,
 285,
 265,
 165,
 394,
 89,
 292,
 364,
 107,
 370,
 37,
 207,
 170,
 13,
 130,
 86,
 113,
 159,
 178,
 29,
 314,
 185,
 383,
 238,
 138,
 290,
 195,
 373,
 325,
 217,
 85,
 392,
 242,
 244,
 201,
 182,
 227,
 200,
 77,
 268,
 6,
 49,
 303,
 309,
 225,
 0,
 356,
 208,
 148,
 249,
 376,
 385,
 373,
 358,
 363,
 377,
 306,
 362,
 221,
 17,
 287,
 31,
 182,
 117,
 222,
 159,
 323,
 284,
 79,
 275,
 16,
 0,
 55,
 225,
 108,
 158,
 322,
 124,
 7,
 209,
 258,
 158,
 221,
 187,
 172,
 308,
 337,
 105,
 211,
 234,
 108,
 378,
 323,
 90,
 380,
 28,
 324,
 307,
 66,
 189,
 303,
 170,
 297,
 89,
 340,
 199,
 68,
 138,
 188,
 143,
 271,
 302,
 171,
 342,
 304,
 294,
 60,
 119,
 199,
 234,
 368,
 331,
 101,
 299,
 153,
 376,
 72,
 295,
 27,
 87,
 285,
 202,
 365,
 76,
 67,
 96,
 240,
 301,
 59,
 136,
 176,
 127,
 35,
 340,
 343,
 55,
 187,
 17,
 383,
 29,
 62,
 301,
 51,
 117,
 67,
 137,
 236,
 368,
 15,
 363,
 112,
 313,
 197,
 256,
 353,
 395,
 362,
 12,
 2,
 

In [13]:
import os
from glob import glob
import csv
import pandas as pd
PATH = "/Users/taoxu/Desktop/conditionalGAN/conditional-GAN-main/default"
EXT = "*.csv"
all_csv_files = [file
                 for path, subdir, files in os.walk(PATH)
                 for file in glob(os.path.join(path, EXT))]
#for i in all_csv_files:
    
results=pd.read_csv(all_csv_files[0])
results
    

Unnamed: 0,1.0 0.9319899244332494 3.672544080604536 32.34881698290707 0.0012594458438287153 0.00014298422044426394
0,1.0 0.9017632241813602 7.6624685138538995 49.5...
1,1.0 0.9596977329974811 1.2896725440806072 14.8...
2,1.0 0.9093198992443325 6.5289672544080535 45.4...
3,1.0 0.929471032745592 3.94962216624685 36.3057...
4,1.0 0.9319899244332494 3.672544080604536 29.91...
...,...
94,1.0 0.8942065491183879 8.886649874055413 45.26...
95,1.0 0.8942065491183879 8.886649874055413 50.16...
96,1.0 0.9370277078085643 3.1486146095717853 20.6...
97,1.0 0.924433249370277 4.53400503778338 27.7695...


In [3]:
os.getcwd()

'/Users/taoxu/Desktop/conditionalGAN/conditional-GAN-main'

In [115]:
XtoY

TypeError: unsupported operand type(s) for -: 'list' and 'list'

In [116]:
YtoX

[0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
