In [1]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [2]:
import time
import tensorflow as tf
tf.__version__

'1.15.2'

In [3]:
def create_my_generator():
    G = tf.keras.Sequential()
    G.add(tf.keras.layers.Dense(units=128, input_dim=100))
    G.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    G.add(tf.keras.layers.Dense(units=64))
    G.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    G.add(tf.keras.layers.Dense(units=32))
    G.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    G.add(tf.keras.layers.Dense(units=16))
    G.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    G.add(tf.keras.layers.Dense(units=3, activation='linear'))
    #G.compile(loss='mean_squared_error', optimizer='RMSprop')
    
    return G

G = create_my_generator()
G.summary()

def create_my_discriminator():
    D = tf.keras.Sequential()
    D.add(tf.keras.layers.Dense(units=16, input_dim=3))
    D.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    D.add(tf.keras.layers.Dense(units=8))
    D.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    D.add(tf.keras.layers.Dense(units=3))
    D.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    D.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))#, kernel_regularizer=tf.keras.regularizers.l1(2.5e-5)))
    #D.compile(loss='binary_crossentropy', optimizer='RMSProp')
    return D

D = create_my_discriminator()
D.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               12928     
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 32)                0         
_________________________________________

In [5]:
import pandas as pd
import numpy as np

df = pd.read_pickle("original_79.pkl")
df["time-f"] = pd.to_datetime(df['time']).astype(int)/10**9
df = df[['lat','lon','time-f']]
df.head()

Unnamed: 0,lat,lon,time-f
0,39.975753,116.330313,1274704000.0
1,39.975652,116.329943,1274704000.0
2,39.975628,116.329563,1274704000.0
3,39.975635,116.32946,1274704000.0
4,39.975656,116.329468,1274704000.0


In [6]:
df.describe()

Unnamed: 0,lat,lon,time-f
count,11243.0,11243.0,11243.0
mean,39.919573,116.373894,1275613000.0
std,0.033148,0.039916,624455.9
min,39.866934,116.318329,1274704000.0
25%,39.895702,116.349771,1275010000.0
50%,39.909312,116.357713,1275537000.0
75%,39.941451,116.383709,1276168000.0
max,39.989226,116.634624,1276687000.0


In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(df)
df2 = scaler.transform(df)
df2

array([[ 1.69486893, -1.09187489, -1.45582494],
       [ 1.69182189, -1.10114484, -1.45581694],
       [ 1.69109784, -1.11066533, -1.45580893],
       ...,
       [-1.19538006,  0.42275995,  1.71946107],
       [-1.19531973,  0.42133187,  1.71946908],
       [-1.19405264,  0.41774916,  1.71947708]])

In [8]:

class WassersteinGAN(object):
    def __init__(self, g_net, d_net, z_sampler, scale=10.0):
        self.g_net = g_net
        self.d_net = d_net
        self.z_sampler = z_sampler
        self.x_dim = self.d_net.input_shape[1]
        self.z_dim = self.g_net.input_shape[1]
        self.x = tf.placeholder(tf.float32, [None, self.x_dim], name='x')
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')

        self.x_ = self.g_net(self.z)

        self.d = self.d_net(self.x)
        self.d_ = self.d_net(self.x_)

        self.g_loss = tf.reduce_mean(self.d_)
        self.d_loss = tf.reduce_mean(self.d) - tf.reduce_mean(self.d_)

        epsilon = tf.random_uniform([], 0.0, 1.0)
        x_hat = epsilon * self.x + (1 - epsilon) * self.x_
        d_hat = self.d_net(x_hat)

        ddx = tf.gradients(d_hat, x_hat)[0]
        ddx = tf.sqrt(tf.reduce_sum(tf.square(ddx), axis=1))
        ddx = tf.reduce_mean(tf.square(ddx - 1.0) * scale)

        self.d_loss = self.d_loss + ddx

        self.d_adam, self.g_adam = None, None
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            self.d_adam = tf.train.AdamOptimizer(learning_rate=2e-5, beta1=0.5, beta2=0.9)\
                .minimize(self.d_loss, var_list=self.d_net.trainable_variables)
            self.g_adam = tf.train.AdamOptimizer(learning_rate=2e-5, beta1=0.5, beta2=0.9)\
                .minimize(self.g_loss, var_list=self.g_net.trainable_variables)

        gpu_options = tf.GPUOptions(allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    def train(self, batch_size=64, num_batches=1000000):
        self.eval_g_loss = np.inf
        self.sess.run(tf.global_variables_initializer())
        start_time = time.time()
        for t in range(0, num_batches):
            d_iters = 5
            #if t % 500 == 0 or t < 25:
            #     d_iters = 100

            for _ in range(0, d_iters):
                #bx = self.x_sampler(batch_size)
                bx = df2[np.random.randint(low=0,high=df2.shape[0],size=batch_size)]
                bz = self.z_sampler(batch_size, self.z_dim)
                self.sess.run(self.d_adam, feed_dict={self.x: bx, self.z: bz})

            bz = self.z_sampler(batch_size, self.z_dim)
            self.sess.run(self.g_adam, feed_dict={self.z: bz, self.x: bx})

            if t % 100 == 0:
                #bx = self.x_sampler(batch_size)
                bx = df2[np.random.randint(low=0,high=df2.shape[0],size=batch_size)]
                bz = self.z_sampler(batch_size, self.z_dim)

                d_loss = self.sess.run(
                    self.d_loss, feed_dict={self.x: bx, self.z: bz}
                )
                g_loss = self.sess.run(
                    self.g_loss, feed_dict={self.z: bz}
                )
                print('Iter [%8d] Time [%5.4f] d_loss [%.4f] g_loss [%.4f]' %
                        (t, time.time() - start_time, d_loss, g_loss))
                
            if t % 100 == 0:
                bx = df2[np.random.randint(low=0,high=df2.shape[0],size=batch_size)]
                bz = self.z_sampler(batch_size, self.z_dim)
                eval_g_loss = self.sess.run(
                    self.g_loss, feed_dict={self.z: bz}
                )
                if eval_g_loss <= self.eval_g_loss:
                  self.eval_g_loss = eval_g_loss
                else:
                  print("Stopped training at %d because eval g_loss is not decreasing" % t)
                  break
    def predict(self, shape=(64, 100)):
      z = self.z_sampler(shape[0], shape[1])
      return self.g_net.predict(z)

In [9]:
class NoiseSampler(object):
    def __call__(self, batch_size, z_dim):
        return np.random.normal(size=(batch_size, z_dim))

In [10]:
zs = NoiseSampler()
#d_net = Discriminator()
#g_net = Generator()
wgan = WassersteinGAN(G, D, zs)

In [16]:
batch_size = 1000
num_epochs = 10000
num_batches = int((df2.shape[0] / batch_size) * num_epochs)

In [17]:
wgan.train(batch_size=batch_size, num_batches=num_batches)

Iter [       0] Time [0.0663] d_loss [8.9848] g_loss [0.4887]
Iter [     100] Time [5.5247] d_loss [8.8107] g_loss [0.4655]
Iter [     200] Time [10.9700] d_loss [8.9693] g_loss [0.4285]
Iter [     300] Time [16.4405] d_loss [8.8542] g_loss [0.3684]
Iter [     400] Time [21.8936] d_loss [8.6228] g_loss [0.2899]
Iter [     500] Time [27.3330] d_loss [8.8904] g_loss [0.2179]
Iter [     600] Time [32.8003] d_loss [8.3954] g_loss [0.1594]
Iter [     700] Time [38.2296] d_loss [9.2645] g_loss [0.1207]
Iter [     800] Time [43.6385] d_loss [9.3280] g_loss [0.0987]
Iter [     900] Time [49.0886] d_loss [8.5020] g_loss [0.0805]
Iter [    1000] Time [54.5369] d_loss [8.9202] g_loss [0.0801]
Stopped training at 1000 because eval g_loss is not decreasing


In [18]:
pred = wgan.predict(shape=(df2.shape[0], 100))

In [19]:
pd.DataFrame(pred).describe()

Unnamed: 0,0,1,2
count,11243.0,11243.0,11243.0
mean,0.104865,0.052507,0.098766
std,0.368905,0.28517,0.278311
min,-1.174524,-1.338235,-0.731285
25%,-0.151711,-0.113671,-0.09098
50%,0.085322,0.075373,0.067821
75%,0.34252,0.240238,0.257404
max,1.772286,0.999077,1.502056


In [20]:
pd.DataFrame(df2).describe().apply(round, args=([6]))

Unnamed: 0,0,1,2
count,11243.0,11243.0,11243.0
mean,-0.0,0.0,-0.0
std,1.000044,1.000044,1.000044
min,-1.588055,-1.392121,-1.455825
25%,-0.720178,-0.604388,-0.965329
50%,-0.309568,-0.405397,-0.121995
75%,0.660038,0.245904,0.888912
max,2.101331,6.532309,1.719477


In [29]:
pred = pd.DataFrame(pred)

In [30]:
pred.rename(columns={0:'lat',
                          1:'lon',
                          2:'time'}, 
                 inplace=True)

In [27]:
pred = scaler.inverse_transform(pred)

In [31]:
pred

Unnamed: 0,lat,lon,time
0,39.915817,116.380051,1.275500e+09
1,39.930439,116.393112,1.275574e+09
2,39.914707,116.377647,1.275598e+09
3,39.910946,116.363533,1.275469e+09
4,39.919975,116.364876,1.275815e+09
...,...,...,...
11238,39.912502,116.375351,1.275428e+09
11239,39.921432,116.390327,1.275566e+09
11240,39.929806,116.379112,1.275661e+09
11241,39.919586,116.373802,1.275613e+09


In [32]:
pred['time'] = pd.to_datetime(pred['time'], unit='s')
pred['user'] = 79
pred = pred[['user','time','lat','lon']]
pred

Unnamed: 0,user,time,lat,lon
0,79,2010-06-02 17:27:28,39.915817,116.380051
1,79,2010-06-03 14:11:12,39.930439,116.393112
2,79,2010-06-03 20:52:16,39.914707,116.377647
3,79,2010-06-02 08:55:28,39.910946,116.363533
4,79,2010-06-06 09:01:52,39.919975,116.364876
...,...,...,...,...
11238,79,2010-06-01 21:26:24,39.912502,116.375351
11239,79,2010-06-03 11:50:24,39.921432,116.390327
11240,79,2010-06-04 14:15:28,39.929806,116.379112
11241,79,2010-06-04 00:53:20,39.919586,116.373802


In [33]:
pred.describe()

Unnamed: 0,user,lat,lon
count,11243.0,11243.0,11243.0
mean,79.0,39.92289,116.374992
std,0.0,0.012229,0.011426
min,79.0,39.880642,116.32048
25%,79.0,39.914543,116.369358
50%,79.0,39.922401,116.3769
75%,79.0,39.930925,116.383484
max,79.0,39.978317,116.413773


In [35]:
df.describe()

Unnamed: 0,lat,lon,time-f
count,11243.0,11243.0,11243.0
mean,39.919573,116.373894,1275613000.0
std,0.033148,0.039916,624455.9
min,39.866934,116.318329,1274704000.0
25%,39.895702,116.349771,1275010000.0
50%,39.909312,116.357713,1275537000.0
75%,39.941451,116.383709,1276168000.0
max,39.989226,116.634624,1276687000.0


In [34]:
pred.to_csv('dpgan_79.csv', index=False, header=None)