In [31]:
import torch

In [59]:
def TransformIntervalIndex(data, nIntervals):
  # for each dimension, transform the data in [0,1] into the interval index
  # first interval = [0, x], the others = (y z]
    
    # data: torch tensor object
    # n.intervals: integer
  
  # create designated number of intervals
    if data.dim() == 1:
        d = 1
    elif data.dim() == 2:
        d = data.size(dim = 1)
    else:
        return # we only use up to 2-dimensional tensor

    breaks = torch.linspace(start = 0, end = 1, steps = nIntervals + 1)
    dataIndices = torch.bucketize(data, breaks, right = False) # ( ] form.
    dataIndices = dataIndices + (dataIndices==0) #move 0 values from the bin number 0 to the bin number 1
    return(dataIndices)


In [77]:
data = torch.tensor(
[
[0.19901582, 0.29330425, 0.08031318, 0.27744206],
[0.38371595, 0.07725842, 0.58872328, 0.60947456],
[0.78765378, 0.18596928, 0.20049580, 0.04321161],
[0.60499579, 0.38050702, 0.26301983, 0.58410214],
[0.42093993, 0.42060113, 0.89575178, 0.57233768],
[0.01160462, 0.55440856, 0.50919182, 0.80756614]
]
)



# 1. test of TransformIntervalIndex

In [88]:
index_1 = TransformIntervalIndex(data, 4)
index_1

tensor([[1, 2, 1, 2],
        [2, 1, 3, 3],
        [4, 1, 1, 1],
        [3, 2, 2, 3],
        [2, 2, 4, 3],
        [1, 3, 3, 4]])

In [84]:
index_2 = TransformIntervalIndex(data, 5)
index_2

tensor([[1, 2, 1, 2],
        [2, 1, 3, 4],
        [4, 1, 2, 1],
        [4, 2, 2, 3],
        [3, 3, 5, 3],
        [1, 3, 3, 5]])

In [85]:
index_3 = TransformIntervalIndex(data, 6)
index_3

tensor([[2, 2, 1, 2],
        [3, 1, 4, 4],
        [5, 2, 2, 1],
        [4, 3, 2, 4],
        [3, 3, 6, 4],
        [1, 4, 4, 5]])

In [113]:
index_3.type()

'torch.LongTensor'

In [122]:
def TransformMultivariate(dataInterval, nBin):
    dim = dataInterval.size(dim = 1)   
    return(
        1 + torch.matmul(
            (dataInterval - 1),
            torch.pow(
                self = nBin,
                exponent = torch.linspace(start = (dim-1), end = 0, steps = dim, dtype = torch.long)
            )
        )
    ) 

# 2.1 Test of TransformMultivariate

In [125]:
multi_1 = TransformMultivariate(index_1, 4);
multi_1

tensor([ 18,  75, 193, 151,  95,  44])

In [126]:
multi_2 = TransformMultivariate(index_2, 5)
multi_2

tensor([ 27, 139, 381, 408, 323,  65])

In [128]:
multi_3 = TransformMultivariate(index_3, 6)
multi_3
multi_3.type()

'torch.LongTensor'

# 3. implementation of TransformOnehot

In [137]:
def TransformOnehot(dataMultivariate, newdim):
    return(
        torch.nn.functional.one_hot(
            dataMultivariate-1,
            num_classes = newdim)
    )


In [139]:
onehot_1 = TransformOnehot(multi_1, 4**4)
onehot_1[0,:]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [133]:
4**4

256

In [18]:
breaks = torch.linspace(start = 0, end = 1, steps = 5 + 1)
breaks


tensor([0.0000, 0.2000, 0.4000, 0.6000, 0.8000, 1.0000])

In [20]:
boundaries = torch.tensor([0.1 0.25, 0.6, 0.8, 0.9])
boundaries
v = torch.tensor([[1, 6, 9], [3, 6, 9]])


In [29]:
v


tensor([[1, 6, 9],
        [3, 6, 9]])

In [30]:
v + (v==1)


tensor([[2, 6, 9],
        [3, 6, 9]])

In [25]:
PrivatizeTwoSample <-
  function(data,
           alpha = Inf,
           discrete.noise = FALSE) {
    ## assume the data is discrete by nature or has already been dicretized.
    n <- nrow(data)
    dim <- ncol(data) #kappa^d if conti data, d if discrete data
    scale <- sqrt(dim)
      
    if (alpha == Inf) {
      #non-private case
      return(scale * data)
    }
    else{
      #private case
        if (discrete.noise) {
          noise <- noise.discrete(
            n = n,
            dim = dim,
            alpha = alpha
          )
         
        } else{
          noise <- noise.conti(
            n = n,
            dim = dim,
            alpha = alpha
          )
        
        }
      return(scale * data + noise)
    }
  } #end of function PrivatizeTwoSample


      
noise.conti <- function(n, dim, alpha) {
  #dim = kappa^d for conti data, d for discrete data
  scale <- (sqrt(8) / alpha) * sqrt(dim)
  n.noise <- n * dim
  unit.laplace <- rexp(n.noise, sqrt(2)) - rexp(n.noise, sqrt(2))
  noise <- scale * unit.laplace
  return(noise)
}

noise.discrete <- function(n, dim, alpha) {
  #dim = kappa^d for conti data, d for discrete data
  param.geom <- 1 - exp(-alpha / (2*sqrt(dim)))
  n.noise <-  n * dim
  noise <- rgeom(n.noise, param.geom) - rgeom(n.noise, param.geom)
  return(noise)
}

noise.vairance.theoretic.conti <-function(dim, alpha){
  return(8 * dim / (alpha^2))
}

noise.vairance.theoretic.discrete <-function(dim, alpha){
  p <- exp(-alpha/(2*sqrt(dim)))
  return(2*p/(1-p)^2)
}


UstatTwoSample <- function(data, n.1) {
  n.2 <- nrow(data) - n.1
  
  data.x <- data[1:n.1,]
  data.y <- data[(n.1 + 1):(n.1 + n.2), ]
  # x only part
  u.x <- data.x %*% t(data.x)
  diag(u.x) <- 0
  u.x <- sum(u.x) / (n.1 * (n.1 - 1))
  
  # y only part
  u.y <- data.y %*% t(data.y)
  diag(u.y) <- 0
  u.y <- sum(u.y) / (n.2 * (n.2 - 1))
  
  # x, y part
  u.xy <- data.x %*% t(data.y)
  u.xy <- sum(u.xy) * ( 2 / (n.1 * n.2) )
  
  return(u.x + u.y - u.xy)
}

PrivatePermutationTwoSampleTest <-
  function(B, data.x, data.y, kappa, alpha, gamma, discrete = FALSE) {
    n.1 <- nrow(data.x)
    n.2 <- nrow(data.y)
    d <- ncol(data.x)
    
    data.x.binned <- Bin(data.x, kappa)
    data.y.binned <- Bin(data.y, kappa)
    
    data.combined <- rbind(data.x.binned, data.y.binned)
    data.privatized <- PrivatizeTwoSample(data.combined, alpha)
    ustat.original <- UstatTwoSample(data.privatized, n.1)
    #cat("\nunpermuted statistic:", ustat.original)
    #permutation procedure
    perm.stats <- rep(0, B)
    for (rep in 1:B) {
      perm <- sample(1:(n.1 + n.2)) 
      perm.stats[rep] <- UstatTwoSample(data.privatized[perm, ], n.1)
      #cat("\ntest statistic:", UstatTwoSample(data.privatized[perm, ], n.1))
    }
    p.value.proxy <- (1 + sum(ustat.original < perm.stats)) / (B + 1)
    
    #test result: TRUE = 1 = reject the null, FALSE = 0 = retain the null.
    
    return(p.value.proxy < gamma)
  }

L2distBetaUnif <- function(shape.1, shape.2){
  return(Beta(2 * shape.1 - 1, 2 * shape.2 -1) / (Beta(shape.1, shape.2))^2 - 1)
}

L2distBetaBeta <- function(shape.1.1, shape.1.2, shape.2.1, shape.2.2){
  first.beta.term <- Beta(2 * shape.1.1 - 1, 2 * shape.1.2 -1) / (Beta(shape.1.1, shape.1.2))^2
  second.beta.term <- Beta(2 * shape.2.1 - 1, 2 * shape.2.2 -1) / (Beta(shape.2.1, shape.2.2))^2
  cross.term <- Beta(shape.1.1 + shape.2.1 - 1, shape.1.2 + shape.2.2 -1) / ( (Beta(shape.1.1, shape.1.2)) * (Beta(shape.2.1, shape.2.2)) )
  return(first.beta.term + second.beta.term - 2 * cross.term)
}

Beta <- function(shape.1, shape.2){
  return(gamma(shape.1) * gamma(shape.2) / gamma(shape.1 + shape.2))
}

tensor([[1, 6, 9],
        [3, 6, 9]])