-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
154 lines (124 loc) · 5.88 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import numpy as np
from types import SimpleNamespace
import functions
import copy
import time
import pickle
# NECESSARY VARIABLES TO RUN THE GIBBS SAMPLER
PARAMETERS = {
# General Knowns
'nInduX': 30, # Number of x inducing points
'nInduY': 30, # Number of y grid points (total inducing points is nInduY*nInduX)
'nFineX': 100, # Fine grid points in x
'nFineY': 100, # Fine grid points in y (total fine points is nFineY*nFineX)
'nIndu': 0, # Number of inducing points after trimming
# Knowns to be evaluated
'dataCoordinates': None, # All points of each trajectory except final location
'sampleCoordinates': None, # All points of each trajectory except initial location
'induCoordinates': None, # Coordinates of the inducing points
'fineCoordinates': None, # Coordinates of fine grid points
'cInduIndu': None, # Covariance matrix between inducing points
'cInduData': None, # Covariance matrix between inducing points and data points
'cInduFine': None, # Covariance matrix between inducing points and fine grid points
'cInduInduChol': None, # Cholesky decomposition of cInduIndu
'cInduInduInv': None, # Inverse of cInduIndu
'cDataIndu': None, # Product of cInduData*cInduInduInv
# Variables
'P': float, # Probability of each sample
'dIndu': None, # Diffusion coefficient sample map at inducing points
'dData': None, # Interpolated diffusion from induCoordinates to dataCoordinates
# Priors
'covLambda': None, # Coefficient of covariance square exponential kernel (1 only used if hyper parameters on specified)
'covL': None, # Lengths parameter of covariance square exponential kernel (20 only used if hyper parameters on specified)
'mle': None, # Prior on Inducing point MAP (set to MLE in init)
'priorMean': None, # Flat surface at MLE to be used as mean of GPP
# Sampler parameters
'epsilon': 1e-2, # Perturbation parameter to keep matrix decomposition numerically stable and sample magnitude
'temperature': 10, # Temperature for simulated annealing will decay to one over time
}
# THIS FUNCTION RUNS THE GIBBS SAMPLER
def analyze(data, nIter=1000, **kwargs):
"""
Run the Gibbs sampler.
Args:
data (Data): Object containing trajectory data.
nIter (int, optional): Number of iterations. Defaults to 1000.
**kwargs: Additional keyword arguments.
Returns:
SimpleNamespace: Object containing the results.
numpy.ndarray: Diffusion coefficient sample map at inducing points.
numpy.ndarray: Probability of each sample.
"""
# Progress Marker
print('Initialization Started')
startTime = time.time()
# Initialize data and variables and time them
parameters = {**copy.deepcopy(PARAMETERS), **kwargs}
variables = functions.initialization(parameters, data)
endTime = time.time()
# Progress Marker
print("Initialization Successful: " + str(endTime - startTime))
print("The flat MLE is: " + str(variables.mle))
# Get number of iterations
burnIter = 2 * nIter
stabilizeIter = 200
# Redefine perturbation magnitude for samples
variables.epsilon = np.ones(np.shape(variables.dIndu))
startTime = time.time()
# Initial temp and cooling rate for targeted annealing
initTemp = 7.5
coolRate = np.log(initTemp) / burnIter
## BURN IN 1 ##
# Burn in super aggressively but do not save the samples,
# updating proposal size to maintain healthy acceptance rate
accVect = np.zeros(np.shape(variables.dIndu))
for i in range(burnIter):
variables.temperature = functions.expCooling(i, initTemp, coolRate)
print(f"Annealing Iteration {i+1}/{burnIter}", end=" ")
t = time.time()
variables, accCount = functions.diffusionPointSampler(variables, data)
# Update proposal size for next iteration
accVect += accCount
accVectNorm = 100 * accVect / (i + 1)
variables.epsilon = np.where(
accVectNorm > 40, variables.epsilon * 1.5,
np.where(accVectNorm < 20, variables.epsilon * 0.5, variables.epsilon)
)
print(f"({time.time()-t:.3f}s)")
## Stabilize proposal Magnitude ##
# Set temperature back to 1 and acceptance to 0, and iterate
# a few times to find ideal magnitude of proposal to maintain healthy acceptance
variables.temperature = 1
accVect = np.zeros(np.shape(variables.dIndu))
for i in range(stabilizeIter):
print(f"Stabilization Iteration {i+1}/{stabilizeIter}", end=" ")
t = time.time()
variables, accCount = functions.diffusionPointSampler(variables, data)
# Update proposal size for next iteration
accVect += accCount
if i > 10:
accVectNorm = 100 * accVect / (i + 1)
variables.epsilon = np.where(accVectNorm > 27.5, variables.epsilon * 1.25, np.where(accVectNorm < 22.5, variables.epsilon * 0.75, variables.epsilon))
print(f"({time.time()-t:.3f}s)")
## MCMC SAMPLING ##
# Initialize outputs
dVect = np.zeros((nIter, variables.nIndu))
pVect = np.zeros(nIter)
map_variables = copy.deepcopy(variables)
map_variables.P = -np.inf
# Gibbs Sampler
for i in range(nIter):
print(f"MCMC Iteration {i+1}/{nIter}", end=" ")
t = time.time()
# Sample diffusion map
variables, accCount = functions.diffusionPointSampler(variables, data)
accVect += accCount
# Save the sample
dVect[i] = variables.dIndu
pVect[i] = variables.P
if variables.P >= map_variables.P:
map_variables = copy.deepcopy(variables)
print(f"({time.time()-t:.3f}s)")
endTime = time.time()
print(str(nIter) + " iterations in " + str(endTime-startTime) + " seconds." )
return map_variables, dVect, pVect