@@ -56,7 +56,7 @@ def Face2StepModel(pointsN, eyeSize, latentSize, embeddingsSize):
56
56
# we need to combine them together and with the encodedP
57
57
combined = encodedP # start with the face features
58
58
for i , EFeat in enumerate (encodedEFList ):
59
- combined = CResidualMultiplicativeLayer (name = 'F2S/ResMul-%d' % i )([
59
+ combined = CFusingBlock (name = 'F2S/ResMul-%d' % i )([
60
60
combined ,
61
61
sMLP (sizes = [latentSize ] * 1 , activation = 'relu' , name = 'F2S/MLP-%d' % i )(
62
62
L .Concatenate (- 1 )([combined , encodedP , EFeat , embeddings ])
@@ -94,7 +94,7 @@ def Step2LatentModel(latentSize, embeddingsSize):
94
94
temporal = sMLP (sizes = [latentSize ] * 1 , activation = 'relu' )(
95
95
L .Concatenate (- 1 )([stepsData , encodedT , embeddings ])
96
96
)
97
- temporal = CResidualMultiplicativeLayer ()([stepsData , temporal ])
97
+ temporal = CFusingBlock ()([stepsData , temporal ])
98
98
intermediate ['S2L/enc0' ] = temporal
99
99
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
100
100
for blockId in range (3 ):
@@ -104,14 +104,14 @@ def Step2LatentModel(latentSize, embeddingsSize):
104
104
temp = sMLP (sizes = [latentSize ] * 1 , activation = 'relu' )(
105
105
L .Concatenate (- 1 )([temporal , temp ])
106
106
)
107
- temporal = CResidualMultiplicativeLayer ()([temporal , temp ])
107
+ temporal = CFusingBlock ()([temporal , temp ])
108
108
intermediate ['S2L/ResLSTM-%d' % blockId ] = temporal
109
109
continue
110
110
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
111
111
latent = sMLP (sizes = [latentSize ] * 1 , activation = 'relu' )(
112
112
L .Concatenate (- 1 )([stepsData , temporal , encodedT , encodedT ])
113
113
)
114
- latent = CResidualMultiplicativeLayer ()([stepsData , latent ])
114
+ latent = CFusingBlock ()([stepsData , latent ])
115
115
return tf .keras .Model (
116
116
inputs = {
117
117
'latent' : latents ,
@@ -185,6 +185,35 @@ def Face2LatentModel(
185
185
IP = lambda x : IntermediatePredictor ()(x ) # own IntermediatePredictor for each output
186
186
res ['intermediate' ] = {k : IP (x ) for k , x in intermediate .items ()}
187
187
res ['result' ] = IP (res ['latent' ])
188
+ ###################################
189
+ # TODO: figure out is this helpful or not
190
+ # branch for global coordinates transformation
191
+ # predict shift, rotation, scale
192
+ emb = L .Concatenate (- 1 )([userIdEmb , placeIdEmb , screenIdEmb ])
193
+ emb = sMLP (sizes = [64 , 64 , 64 , 64 , 32 ], activation = 'relu' )(emb [:, 0 ])
194
+ shift = L .Dense (2 , name = 'GlobalShift' )(emb )[:, None ]
195
+ rotation = L .Dense (1 , name = 'GlobalRotation' , activation = 'sigmoid' )(emb )[:, None ] * np .pi
196
+ scale = L .Dense (2 , name = 'GlobalScale' )(emb )[:, None ]
197
+
198
+ shifted = res ['result' ] + shift - 0.5 # [0.5, 0.5] -> [0, 0]
199
+ # Rotation matrix components
200
+ cos_rotation = L .Lambda (lambda x : tf .cos (x ))(rotation )
201
+ sin_rotation = L .Lambda (lambda x : tf .sin (x ))(rotation )
202
+ rotation_matrix = L .Lambda (lambda x : tf .stack ([x [0 ], x [1 ]], axis = - 1 ))([cos_rotation , sin_rotation ])
203
+
204
+ # Apply rotation
205
+ rotated = L .Lambda (
206
+ lambda x : tf .einsum ('isj,iomj->isj' , x [0 ], x [1 ])
207
+ )([shifted , rotation_matrix ]) + 0.5 # [0, 0] -> [0.5, 0.5] back
208
+
209
+ # Apply scale
210
+ scaled = rotated * scale
211
+ def clipWithGradient (x ):
212
+ res = tf .clip_by_value (x , 0.0 , 1.0 )
213
+ return x + tf .stop_gradient (res - x )
214
+
215
+ res ['result' ] = L .Lambda (clipWithGradient )(scaled )
216
+ ###################################
188
217
189
218
main = tf .keras .Model (inputs = inputs , outputs = res )
190
219
return {
@@ -195,13 +224,6 @@ def Face2LatentModel(
195
224
}
196
225
197
226
if __name__ == '__main__' :
198
- # autoencoder = FaceAutoencoderModel(latentSize=64, means={
199
- # 'points': np.zeros((478, 2), np.float32),
200
- # 'left eye': np.zeros((32, 32), np.float32),
201
- # 'right eye': np.zeros((32, 32), np.float32),
202
- # })['main']
203
- # autoencoder.summary(expand_nested=True)
204
-
205
227
X = Face2LatentModel (steps = 5 , latentSize = 64 ,
206
228
embeddings = {
207
229
'userId' : 1 , 'placeId' : 1 , 'screenId' : 1 , 'size' : 64
0 commit comments