In [1]:
import scipy.stats
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 
import plotly.plotly as py
import plotly.graph_objs as go
py.sign_in('AriaRen','AtiibfXGar8FRpQvhnzD')

In [2]:
def PoissonPP(rt, Dx, Dy = None, Dz = None, seed = None):
    '''
    rt = rate or Poisson distribution
    Dx, Dy, Dz = the dimension of 3D array. 
    seed = seed variable for random_state in .rvs arguments (default = None)
    
    POISSONPP determines the number of events 'N' for a rectangular region,
    given the rate 'rt', the dimensions, 'Dx', 'Dy', 'Dz' and seed variable.
    Returns a <Nx3> NumPy array.
    
    '''
    
    if Dy == None:
        Dy = Dx
    if Dz == None:
        Dz = Dx
    if seed == None: 
        N = scipy.stats.poisson(rt*Dx*Dy*Dz).rvs()
        x = scipy.stats.uniform.rvs(loc = 0, scale = Dx, size = ((N, 1)))
        y = scipy.stats.uniform.rvs(loc = 0, scale = Dy, size = ((N, 1)))
        z = scipy.stats.uniform.rvs(loc = 0, scale = Dz, size = ((N, 1)))
    else:
        N = scipy.stats.poisson(rt*Dx*Dy*Dz).rvs(random_state=seed)
        x = scipy.stats.uniform.rvs(loc = 0, scale = Dx, size = ((N, 1)), random_state=seed)
        y = scipy.stats.uniform.rvs(loc = 0, scale = Dy, size = ((N, 1)), random_state=seed + 1)
        z = scipy.stats.uniform.rvs(loc = 0, scale = Dz, size = ((N, 1)), random_state=seed + 2)
        '''
        print('Dx = {}'.format(Dx))
        print('Dy = {}'.format(Dy))
        print('N = {}'.format(N))
        '''
    P = np.hstack((x, y, z))
    return(P)


In [3]:
def ThomasPP(rt, Dx, Dy, Dz, sigma, mu, seed = None):
    '''
    rt = rate or Poisson distribution
    Dx, Dy, Dz = the dimension of 3D array
    sigma = the standard deviation of Gaussian distribution surrounding parent points
    mu = generate the count for each Gaussian distribution following Poisson distribution
    seed = seed variable for random_state in .rvs arguments (default = None)
    
    THOMASPP generates multiple Gaussian distribution surrounding given parents points, 
    which are created by PoissonPP(). The sample size of Gaussian distribution is determined by 
    Poisson distribution 'mu', where the variance is determined by 'Sigma'.
    Returns a <Nx3> NumPy array.
    '''

    # Create a set of parent points form a Poisson(kappa)
    # distribution on the square region [0, Dx] * [0, Dx]
    
    if seed == None:
        parents = PoissonPP(rt, Dx, Dy, Dz, seed = None)
    else:    
        parents = PoissonPP(rt, Dx, Dy, Dz, seed = seed)
    print(seed)
    print('seed')
    # M is the number of parents
    M = parents.shape[0]
    # an empty list for the Thomas process points
    x = []
    y = []
    z = []
    # for each parent point
    for i in range(M):
        # determine a number of children according to a Poisson(mu) distribution
        parent_x = parents[i][0]
        parent_y = parents[i][1]
        parent_z = parents[i][2]
        pdf_x = scipy.stats.norm(loc = parent_x, scale = sigma)
        pdf_y = scipy.stats.norm(loc = parent_y, scale = sigma)
        pdf_z = scipy.stats.norm(loc = parent_z, scale = sigma)
        print(parent_x)
        print(pdf_x)
        
        # check if the seed arg exists.
        if seed == None:
            N = scipy.stats.poisson(mu).rvs()
            children_x = list(pdf_x.rvs(N))
            children_y = list(pdf_y.rvs(N))
            children_z = list(pdf_z.rvs(N))
        else:
            N = sum(scipy.stats.poisson(mu).rvs(size = 32, random_state = seed))
            children_x = list(pdf_x.rvs(N, random_state = (seed + i + 1)))
            children_y = list(pdf_y.rvs(N, random_state = (seed + i + 2)))
            children_z = list(pdf_z.rvs(N, random_state = (seed + i + 3)))
            print('N')
            print(N)
            print(children_x)
            
        
        # concate x y coordinates
        x = x + children_x
        y = y + children_y
        z = z + children_z
        
        
    x = np.array([x]).T
    y = np.array([y]).T
    z = np.array([z]).T

    P = np.hstack((x, y, z))
    return P

In [4]:
def xyzroi(xyzarray, xmin, xmax, ymin, ymax, zmin, zmax):
    '''
    xyzarray: A <Nx3> NumPy array with xyz coordinates.
    xmin, xman: the range in x-axis
    ymin, ymax: the range in y-axis
    zmin, zmax: the range in z-axis
    
    XYROI crop the dataset by given ranges in x and y axis ('xmin', 'xmax', 
    'ymin', 'max'), then return a <Nx2> NumPy array. 
    '''

    temp_xyz = xyzarray[(xyzarray[:,0] > xmin) & (xyzarray[:,0] < xmax) & (xyzarray[:,1] > ymin) & (xyzarray[:,1] < ymax)\
                       & (xyzarray[:,2] > zmin) & (xyzarray[:,2] < zmax)]
    return temp_xyz


In [5]:
def xyzroi_idx(xyzarray, xmin, xmax, ymin, ymax, zmin, zmax):
    '''
    xyzarray: A <Nx3> NumPy array with xy coordinates.
    xmin, xman: the range in x-axis
    ymin, ymax: the range in y-axis
    zmin, zmax: the range in z-axis
    
    XYZROI crop the dataset by given ranges in x, y and z axis ('xmin', 'xmax', 
    'ymin', 'ymax', 'zmin', 'zmax'), then return a <Nx3> NumPy array. 
    
    The last column is index. 
    '''

    idx = xyzarray.shape[0]
    idxarray = np.array([range(idx)]).T
    xyzarray = np.hstack((xyzarray, idxarray))
    temp_xyz = xyzarray[(xyzarray[:,0] > xmin) & (xyzarray[:,0] < xmax) & (xyzarray[:,1] > ymin) & (xyzarray[:,1] < ymax)\
                      & (xyzarray[:,2] > zmin) & (xyzarray[:,2] < zmax)]
    return temp_xyz

In [17]:
a = ThomasPP(20, 2, 2, 2, 5, 0.6, seed = 3)

3
seed
1.101595805149151
<scipy.stats._distn_infrastructure.rv_frozen object at 0x1168aea20>
N
13
[1.3544043408638489, 3.601352471338296, -3.8779488503851742, 4.569588346605709, -0.9899117949853995, -6.82129037041147, -2.1369380304601013, 4.094471674986037, 2.762845968198973, -4.635787359578249, 4.194944250283558, 0.6616611634477738, 3.2269577875842583]
1.4162956452362097
<scipy.stats._distn_infrastructure.rv_frozen object at 0x1168f7550>
N
13
[3.6224330796614166, -0.23805511423422843, 13.570151580275109, 0.15583499722082506, 1.9643448531271237, 9.328701230544027, -3.129866379045, -1.5418876444152323, 2.354311774421387, -0.23305414366058663, -4.547527416872821, 0.3919130922982732, -0.377849089770006]
0.5818094778258887
<scipy.stats._distn_infrastructure.rv_frozen object at 0x1168d4c88>
N
13
[-0.9771088896116944, 4.22682909588841, 1.6709134182058987, -3.913649504909806, -11.852093780313435, 5.14806708400533, 6.217128107685051, -6.98865666532519, 8.778264892514343, -1.5676585372058514, 1

0.20020868916288936
<scipy.stats._distn_infrastructure.rv_frozen object at 0x1168e2470>
N
13
[4.446773119347882, -5.413527396045915, 5.915632152467595, 3.5750886174096017, -2.1239249803824776, 5.509068595022253, 7.349900744596826, -3.126708997150182, -4.7223108831168386, -4.99807634506067, -2.6909500197593808, 0.6725668862047862, 5.788085786350729]
0.25858772921770834
<scipy.stats._distn_infrastructure.rv_frozen object at 0x1168d4d68>
N
13
[-0.3994732806602398, -2.0436475853413207, 3.5733980498444264, -6.3896534975790455, -2.4546301744381167, -3.405769896961862, -4.9079628519088985, -0.8840069956072059, 0.04977994694960225, -9.063211989047137, 2.7240837959647872, 10.767900430902003, -5.58161102817761]
1.106555463615307
<scipy.stats._distn_infrastructure.rv_frozen object at 0x1168d48d0>
N
13
[1.9908090690096198, 12.995326088465593, 6.100219748883977, -5.444198521283911, 2.0075050694620717, -7.7172146332239295, -6.109073880169528, -5.88987232308502, 5.208325459881448, 7.583052062488714, 

In [20]:
a

array([[ 1.35440434,  4.14019711, -1.11493203],
       [ 3.60135247,  0.27970892,  4.08900596],
       [-3.87794885, 14.08791561,  1.53309028],
       ...,
       [-3.55426255,  4.54369854, -1.38294838],
       [ 4.52314056, -1.30994448,  6.11828067],
       [ 6.64581438,  6.9589386 ,  7.49333646]])

In [22]:
import pandas as pd
a = pd.DataFrame(a)
a.to_csv("newdata.csv",index = False)

In [80]:
x,y,z = a.transpose()
trace1 = go.Scatter3d(
    x=x,
    y=y,
    z=z,
    mode='markers',
    marker=dict(
        size=2,
        color=z,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)

data = [trace1]
layout = go.Layout(
    margin=dict(
        l=1,
        r=0,
        b=0,
        t=0
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='3d-scatter-colorscale')