In [1]:
import keras.backend as K
import tensorflow as tf
import math
import numpy as np
import pandas as pd
from tensorflow.keras import regularizers
from tensorflow.keras import layers
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from tensorflow.keras.layers import Input, Dense, Reshape, Concatenate, Layer, Dropout
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, Flatten,LeakyReLU,ReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import RMSprop, Adam
from functools import partial
from gumbel_softmax import GumbelSoftmax

from IPython.core.interactiveshell import InteractiveShell
pd.options.display.max_rows = 2000

# Defining Class
## Implement  multi-head attention as a Keras layer

In [2]:
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output

## Implement a Transformer block as a Keras layer

In [3]:
"""
## Implement a Transformer block as a layer
"""

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=True):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

## Defining Class 3. Implement 1D-Positional encoding and 2D-Locational encoding

In [4]:
class PositionalEncoding1D(nn.Module):
    def __init__(self, channels):
        """
        :param channels: The last dimension of the tensor you want to apply pos emb to.
        """
        super(PositionalEncoding1D, self).__init__()
        self.channels = channels
        inv_freq = 1. / (10000 ** (torch.arange(0, channels, 2).float() / channels))
        self.register_buffer('inv_freq', inv_freq)

    def forward(self, tensor):
        """
        :param tensor: A 3d tensor of size (batch_size, x, ch)
        :return: Positional Encoding Matrix of size (batch_size, x, ch)
        """
        if len(tensor.shape) != 3:
            raise RuntimeError("The input tensor has to be 3d!")
        _, x, orig_ch = tensor.shape
        pos_x = torch.arange(x, device=tensor.device).type(self.inv_freq.type())
        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
        emb_x = torch.cat((sin_inp_x.sin(), sin_inp_x.cos()), dim=-1)
        emb = torch.zeros((x,self.channels),device=tensor.device).type(tensor.type())
        emb[:,:self.channels] = emb_x

        return emb[None,:,:orig_ch]

class PositionalEncoding2D(nn.Module):
    def __init__(self, channels):
        """
        :param channels: The last dimension of the tensor you want to apply pos emb to.
        """
        super(PositionalEncoding2D, self).__init__()
        channels = int(np.ceil(channels/2))
        self.channels = channels
        inv_freq = 1. / (10000 ** (torch.arange(0, channels, 2).float() / channels))
        self.register_buffer('inv_freq', inv_freq)

    def forward(self, tensor):
        """
        
        :param tensor: A 4d tensor of size (batch_size, x, y, ch)
        :return: Positional Encoding Matrix of size (batch_size, x, y, ch)
        """
        if len(tensor.shape) != 4:
            raise RuntimeError("The input tensor has to be 4d!")
        _, x, y, orig_ch = tensor.shape
        pos_x = torch.arange(x, device=tensor.device).type(self.inv_freq.type())
        pos_y = torch.arange(y, device=tensor.device).type(self.inv_freq.type())
        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
        sin_inp_y = torch.einsum("i,j->ij", pos_y, self.inv_freq)
        emb_x = torch.cat((sin_inp_x.sin(), sin_inp_x.cos()), dim=-1).unsqueeze(1)
        emb_y = torch.cat((sin_inp_y.sin(), sin_inp_y.cos()), dim=-1)
        emb = torch.zeros((x,y,self.channels*2),device=tensor.device).type(tensor.type())
        emb[:,:,:self.channels] = emb_x
        emb[:,:,self.channels:2*self.channels] = emb_y
        return emb[None,:,:,:orig_ch]

In [5]:
tf.compat.v1.disable_eager_execution() ## For implementing WGAN-GP in training process 
gpu = tf.config.experimental.get_visible_devices('GPU')[0] ## Identify the GPU
tf.config.experimental.set_memory_growth(device = gpu, enable = True)

# Load the sample data
   * Large-scale incomplete data (Smart card data)
       * (Input) Trip-chain attributes (y_train_SC_seq)
       * (Input) General attributes (y_train_SC_nseq)
   * Small-scale complete data (Travel survey data)
       * (Input) Trip-chain attributes (y_train_seq)
       * (Input) General attributes (y_train_nseq)
       * (Output) Qualitative attributes (x_train_cond)

In [6]:
y_train_SC_seq = np.load('Data/y_train_SC_seq_LatLon.npy',allow_pickle=True)
y_test_SC_seq = np.load('Data/y_test_SC_seq_LatLon.npy',allow_pickle=True)
y_train_seq = np.load('Data/y_train_seq.npy',allow_pickle=True)
y_test_seq = np.load('Data/y_test_seq.npy',allow_pickle=True)

y_train_nseq= pd.read_csv('Data/y_train_nseq.csv')
y_test_nseq= pd.read_csv('Data/y_test_nseq.csv')
y_train_SC_nseq= pd.read_csv('Data/y_train_SC_nseq.csv')
y_test_SC_nseq= pd.read_csv('Data/y_test_SC_nseq.csv')
x_train_cond= pd.read_csv('Data/x_train_cond.csv')
x_test_cond= pd.read_csv('Data/x_test_cond.csv')

## Qualitative attrubutes in the raw small-scale complete data
x_train_cond_R = pd.read_csv('Data/train_complete_qualitative.csv')
y_test_cond_SC_R = pd.read_csv('Data/train_incomplete_tripChain_LatLon.csv')

## Extract the LatLon Information
y_train_LatLon = y_train_SC_seq[:,:,53:55]
y_test_LatLon = y_test_SC_seq[:,:,53:55]
y_train_realStay = y_train_SC_seq[:,:,55]
y_test_realStay = y_test_SC_seq[:,:,55]

y_train_SC_seq = y_train_SC_seq[:,:,:53]
y_test_SC_seq = y_test_SC_seq[:,:,:53]

for i in range(3):
    y_train_SC_seq = np.insert(y_train_SC_seq,49,0,axis=2)
    y_test_SC_seq = np.insert(y_test_SC_seq,49,0,axis=2)

In [7]:
num_features = y_train_seq.shape[2]-4 # The number of input variables including sequential information
maxlen = 5  # The number of maximum sequence of trip-chain
num_data = y_train_seq.shape[0] # The number of individuals in training data (complete)
num_data_SC = y_train_SC_seq.shape[0] # The number of individuals in training data (incomplete)


# Model structure

## Defining function for 1D-Positional and 2D-Locational encoding

In [8]:
## 1D Positional Encoding (numbpy implementation)
def seq(data, data_len):
    
    pos_encoding = PositionalEncoding1D(num_features)
    d = torch.zeros((1,maxlen,num_features))
    pos_1d_emb = pos_encoding(d) 
    pos_1d_emb = pos_1d_emb.numpy()
    
    pos_seq_emb = []
    for i in range(data_len):
        for j in range(maxlen):
            a = int(data[i,j,num_features+3])
            if a == 1 :
                b = pos_1d_emb[:,0,:]
            elif a == 2:
                b = pos_1d_emb[:,1,:]
            elif a == 3:
                b = pos_1d_emb[:,2,:]
            elif a == 4:
                b = pos_1d_emb[:,3,:]
            elif a == 5:
                b = pos_1d_emb[:,4,:]
            else :
                b = np.zeros((1,num_features))
            pos_seq_emb.append(b)
    pos_seq_emb=np.array(pos_seq_emb)
    pos_seq_emb=pos_seq_emb.reshape(data_len,maxlen,num_features)  
    
    for k in range(4):
        data = np.delete(data,num_features, axis=2)
    

    data_seq = data + pos_seq_emb
    
    return data_seq

## 2D Lositional Encoding (numbpy implementation)
def selo(data, data_len):
    pos_encoding = PositionalEncoding1D(num_features)
    d = torch.zeros((1,maxlen,num_features))
    pos_1d_emb = pos_encoding(d) 
    pos_1d_emb = pos_1d_emb.numpy()
    
    pos_seq_emb = []    
    for i in range(data_len):
        for j in range(maxlen):
            a = int(data[i,j,num_features+3])
            if a == 1 :
                b = pos_1d_emb[:,0,:]
            elif a == 2:
                b = pos_1d_emb[:,1,:]
            elif a == 3:
                b = pos_1d_emb[:,2,:]
            elif a == 4:
                b = pos_1d_emb[:,3,:]
            elif a == 5:
                b = pos_1d_emb[:,4,:]
            else :
                b = np.zeros((1,num_features))
            pos_seq_emb.append(b)
    pos_seq_emb=np.array(pos_seq_emb)
    pos_seq_emb=pos_seq_emb.reshape(data_len,maxlen,num_features)
    
    p_enc_2d = PositionalEncoding2D(num_features)
    m = torch.zeros((1,95,40,num_features)) # 21 by 21 grids
    pos_2d_emb = p_enc_2d(m)
    pos_2d_emb = pos_2d_emb.numpy()
    pos_2d_emb[:,0,0,:] = 0
    
    pos_loc_emb = []    
    for i in range(data_len):           
        a=pos_2d_emb[:,int(data[i,0,num_features]),int(data[i,0,num_features+1]),:]
        b=pos_2d_emb[:,int(data[i,1,num_features]),int(data[i,1,num_features+1]),:]
        c=pos_2d_emb[:,int(data[i,2,num_features]),int(data[i,2,num_features+1]),:]
        d=pos_2d_emb[:,int(data[i,3,num_features]),int(data[i,3,num_features+1]),:]
        e=pos_2d_emb[:,int(data[i,4,num_features]),int(data[i,4,num_features+1]),:]
        pos_loc_emb.append(a)
        pos_loc_emb.append(b)
        pos_loc_emb.append(c)
        pos_loc_emb.append(d)
        pos_loc_emb.append(e)
    pos_loc_emb=np.array(pos_loc_emb)
    pos_loc_emb=np.reshape(pos_loc_emb, (data_len,maxlen,num_features))
    
    for k in range(4):
        data = np.delete(data,num_features, axis=2)


    data_seq_loc = data + pos_seq_emb + pos_loc_emb
    
    return data_seq_loc

 * Applying the positional and locational encoding to the trip-chain attributes

In [9]:
y_train_seq_1d = seq(y_train_seq,len(y_train_seq)) # complete trip-chain attributes with 1D-positional encoding
y_train_seq_2d = selo(y_train_seq,len(y_train_seq)) # complete trip-chain attributes with 1D-positional and 2D-locational encoding
y_train_SC_seq_1d = seq(y_train_SC_seq,len(y_train_SC_seq)) # incomplete trip-chain attributes with 1D-positional encoding
y_train_SC_seq_2d = selo(y_train_SC_seq,len(y_train_SC_seq)) # incomplete trip-chain attributes with 1D-positional and 2D-locational encoding

## Build WGAN-GP

* Build generator

In [10]:
def build_generator():

    noise = Input(shape=(latent_dim))
    label_ns = Input(shape=(nseq_dim))  
    label = Input(shape=(maxlen,embed_dim))
    
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    x = transformer_block(label)
    x = transformer_block(x)
    x = transformer_block(x)
    x = transformer_block(x)
    x = transformer_block(x)
    x = transformer_block(x)
    
    k = Flatten()(x)   
    
    inputs = Concatenate()([noise,label_ns,k])  
    
    h = Dense(intermediate_dim[0])(inputs)
    #h = BatchNormalization()(h) 
    #h = Dropout(0.1)(h)
    h = Activation('relu')(h)
    
    h = Dense(intermediate_dim[1])(h)
    #h = BatchNormalization()(h)
    #h = Dropout(0.1)(h)
    h = Activation('relu')(h)
    
    h = Dense(intermediate_dim[2])(h)
    #h = BatchNormalization()(h)
    #h = Dropout(0.1)(h)
    h = Activation('relu')(h)
    
   

    cat_outputs = [] # Six socioeconomic factors (Qualitative attributes)
    for i in ['Home_income', 'Home_car', 'Home_drive', 'Age', 'Gender','Home_type']:
        t = Dense(x_train_cond_R[i].nunique())(h)
        #t = Activation('softmax')(t) # You can choose the softmax rather than gumbel
        t = gumbel(t,6)
        cat_outputs.append(t)
    
    tp_outputs = [] # Trip purposes of each trip in the trip-chain (Qualitative attributes)
    p = Dense(48,activation='relu')(x)
    p = Dense(24,activation='relu')(p)
    p = Dense(12,activation='relu')(p)
    for i in range(5):
        t = Dense(6)(p[:,i,:])
        #t = Activation('softmax')(t) # You can choose the softmax rather than gumbel 
        t = gumbel(t,6)
        cat_outputs.append(t)
                                 
       
    concat = Concatenate()(cat_outputs)
    
    
    model = Model([noise,label_ns,label],concat)

    return model
    

* Build critic(discriminator)

In [11]:
def build_critic():
    
    img = Input(shape=x_train_cond.shape[1])
    label = Input(shape=(maxlen,embed_dim))
    label_ns = Input(shape=(nseq_dim))  
    
    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    x = transformer_block(label)
    x = transformer_block(x)
    x = transformer_block(x)
    x = transformer_block(x)
    x = transformer_block(x)
    x = transformer_block(x)

    x = Flatten()(x)
    inputs = Concatenate()([img,label_ns,x])  

    h = Dense(intermediate_dim[2])(inputs)
    h = LeakyReLU(alpha=0.2)(h)
    h = Dense(intermediate_dim[1])(h)
    h = LeakyReLU(alpha=0.2)(h)
    h = Dense(intermediate_dim[0])(h)
    h = LeakyReLU(alpha=0.2)(h)
    validity = Dense(1)(h)
    
    model = Model(inputs = [img,label_ns,label],outputs = validity)

    return(model)

# Model training

* Define functions for WGAN-GP

In [12]:
def wasserstein_loss(y_true, y_pred):
    return K.mean(y_true * y_pred)

def RandomWeightedAverage(inputs):
    alpha = K.random_uniform((BATCH_SIZE, 1))
    return (alpha * inputs[0]) + ((1 - alpha) * inputs[1])

def gradient_penalty_loss(y_true, y_pred, averaged_samples):
    """
    Computes gradient penalty based on prediction and weighted real / fake samples
    """
    gradients = K.gradients(y_pred, averaged_samples)[0]
    # compute the euclidean norm by squaring ...
    gradients_sqr = K.square(gradients)
    #   ... summing over the rows ...
    gradients_sqr_sum = K.sum(gradients_sqr,
                              axis=np.arange(1, len(gradients_sqr.shape)))
    #   ... and sqrt
    gradient_l2_norm = K.sqrt(gradients_sqr_sum)
    # compute lambda * (1 - ||grad||)^2 still for each single sample
    gradient_penalty = K.square(1 - gradient_l2_norm)
    # return the mean as loss over all the batch samples
    return K.mean(gradient_penalty)

* Hyperparameter Setting for Conditional WGAN-GP

In [13]:
## Setting hyperparameters from MultiCATGAN
intermediate_dim = [256,256,256]
latent_dim = 128
optimizer = Adam(lr=2e-04) ## 
BATCH_SIZE = 256
gumbel = GumbelSoftmax(name = 'gumbel')
embed_dim = num_features
nseq_dim = y_train_nseq.shape[1]
num_heads = 4
ff_dim = 36

## Construct the Conditional WGAN-GP

In [14]:
## Model Build
generator = build_generator()
critic = build_critic()

#-------------------------------
# Construct Computational Graph
#       for the Critic
#-------------------------------

## Freeze generator's layers while training critic
generator.trainable = False



# Image input (real sample)
real_img = Input(shape=x_train_cond.shape[1])

# Noise input
z_disc = Input(shape=(latent_dim))
# Generate image based of noise (fake sample) and add label to the input 
label = Input(shape=(maxlen,embed_dim))
label_ns = Input(shape=(nseq_dim))  
fake_img = generator([z_disc,label_ns,label])

# Discriminator determines validity of the real and fake images
fake = critic([fake_img,label_ns,label])
valid = critic([real_img,label_ns,label])


# Construct weighted average between real and fake images
interpolated_img = RandomWeightedAverage([real_img, fake_img])

# Determine validity of weighted sample
validity_interpolated = critic([interpolated_img,label_ns,label])

partial_gp_loss = partial(gradient_penalty_loss,averaged_samples=interpolated_img)
partial_gp_loss.__name__ = 'gradient_penalty' # Keras requires function names

critic_model = Model(inputs=[real_img,label_ns,label,z_disc], outputs=[valid, fake, validity_interpolated])
critic_model.compile(loss=[wasserstein_loss,
                           wasserstein_loss,
                           partial_gp_loss],
                           optimizer=optimizer,
                           loss_weights=[1, 1, 10])


#-------------------------------
# Construct Computational Graph
#         for Generator
#-------------------------------

# For the generator we freeze the critic's layers
critic.trainable = False
generator.trainable = True

# Sampled noise for input to generator
z_gen = Input(shape=(latent_dim))
# add label to the input
label = Input(shape=(maxlen,embed_dim))
label_ns = Input(shape=(nseq_dim))  
# Generate images based of noise
img = generator([z_gen,label_ns,label])

# Discriminator determines validity
valid = critic([img,label_ns,label])

# Defines generator model
generator_model = Model([z_gen,label_ns,label], valid)
generator_model.compile(loss=wasserstein_loss, optimizer=optimizer)

## Training the WGAN-GP

In [15]:
import time
start = time.time()


## Train
epochs = 40000 # 1 hours 7000
sample_interval = 500
n_critic = 5
BATCH_SIZE = 256
losslog = []

# Load the dataset
X_train = x_train_cond.values.astype("float32")
y_train = y_train_seq_2d
y_train_ns = y_train_nseq.values.astype("float32")
y_train_SC = y_train_SC_seq_2d
y_train_SC_ns = y_train_SC_nseq.values.astype("float32")

# Evaluation

## Estimate the qualitative attributes using generator

In [16]:
# Define the function for evaluation
## Convert the dummy qualitative attributes into categorical one
def wide_to_long(samples_pop):
    resamples = []
    for j in range(samples_pop.shape[0]):
        if(type(samples_pop) is np.ndarray):
            sam = samples_pop[j]
        else:
            sam = samples_pop.values[j]
        resamples_row = []
        for i in range(len(n_uni_col)-1):
            idx = range(n_uni_col[i],n_uni_col[i+1])
            resamples_row = np.append(resamples_row,np.random.choice(col_pop[idx],p=sam[idx],size=1))
        resamples = np.concatenate((resamples,resamples_row),axis=0)
    resamples = resamples.reshape(samples_pop.shape[0],len(n_uni_col)-1 )
    resamples = pd.DataFrame(resamples,columns= x_train_cond_R.columns[1:7].to_list()+["TP_0","TP_1","TP_2","TP_3","TP_4"])
    resamples = resamples.apply(lambda x: x.astype('category'))
    return(resamples)

## Calculate the mean Jenson-Shannon Distance
def mean_JSD(samples,resamples):
    Marg_JSD = []
    for col in samples.columns:
        resam = pd.value_counts(resamples[col]).sort_index()
        sam = pd.value_counts(samples[col]).sort_index()
        tab = pd.merge(resam,sam,left_index=True, right_index=True,how='outer')
        tab = tab.fillna(0)
        Marg_JSD.append(jensenshannon(tab.iloc[:,0], tab.iloc[:,1]))
     

    bi_index = combinations(samples.columns,2)
    bi_index = list(bi_index)
    col1,col2 = bi_index[0]

    Bi_JSD = []
    for col1,col2 in bi_index:
        resam = pd.DataFrame(pd.crosstab(resamples[col1],resamples[col2],rownames=[col1],colnames=[col2]).stack().sort_index())
        sam = pd.DataFrame(pd.crosstab(samples[col1],samples[col2],rownames=[col1],colnames=[col2]).stack().sort_index())
        tab = pd.merge(resam,sam,left_index=True, right_index=True,how='outer')
        tab = tab.fillna(0)
        Bi_JSD.append(jensenshannon(tab.iloc[:,0], tab.iloc[:,1]))

    return([Marg_JSD,Bi_JSD])

# ## Calculate the mean Jenson-Shannon Distanc

# def get_resamples(y_test_SC_seq,y_test_SC_nseq_ns,num_gen=1):
#     resamples_SC = pd.DataFrame()
#     for i in range(num_gen):
#         y_test_SC = selo(y_test_SC_seq,len(y_test_SC_seq))
#         y_test_SC_ns = y_test_SC_nseq.values.astype("float32")
#         idx = sample(range(y_test_SC.shape[0]),x_test_cond.shape[0])
#         samples_act = y_test_SC[idx,:]
#         samples_act_ns = y_test_SC_ns[idx,:]
#         samples_pop_SC = generate_images(samples_act,samples_act_ns)
#         resamples_SC = pd.concat([resamples_SC,wide_to_long(samples_pop_SC)],axis=0)
#     return(resamples_SC)

# Define the generator function
def generate_images(label,label_ns):
    generator.load_weights('Py_generator/AttnMO_XY_F1')
    noise = np.random.normal(0, 1, (label.shape[0],latent_dim))
    gen_imgs = generator.predict([noise,label_ns,label])
   
    
    return gen_imgs

In [17]:
from random import sample

# Generate the qualitative attributes of smart card
y_test_SC = selo(y_test_SC_seq,len(y_test_SC_seq))
y_test_SC_ns = y_test_SC_nseq.values.astype("float32")

samples_act = y_test_SC
samples_act_ns = y_test_SC_ns
samples_pop_SC = generate_images(samples_act,samples_act_ns)



# Generate the qualitative attributes of travel survey (For validation)
y_test_TS = selo(y_test_seq,len(y_test_seq))
y_test_TS_ns = y_test_nseq.values.astype("float32")
idx = sample(range(y_test_TS.shape[0]),x_test_cond.shape[0])

samples_act = y_test_TS[idx,:]
samples_act_ns = y_test_TS_ns[idx,:]
samples_pop = generate_images(samples_act,samples_act_ns)



## Make Ground Truth & Test
n_uni_col = [x_train_cond_R[i].nunique() for i in x_train_cond_R.columns[1:7]]
n_uni_col = [0]+n_uni_col+[6,6,6,6,6]
n_uni_col = np.cumsum(n_uni_col)
col_pop = x_test_cond.columns



In [18]:
samples = wide_to_long(x_test_cond)
resamples = wide_to_long(samples_pop)
resamples_SC = wide_to_long(samples_pop_SC)

# Post processing for Visualization

* Construct the genearted data for visualization 

In [19]:
import random

## Load the Coordinate data for the columne "P_Arrival_code" in the trip-chain data
## LU1: Ratio of Residential area; LU2: Ratio of Commericial area; LU3: Ratio of other area
## P_Arrival_code is TAZ spatial unit in Korea (~1 km radius)
TAZ_Coord = pd.read_csv('Data/TAZ_Coord.csv')



## Convert the Target attributes from the wide-form to long-form
TripPurposes = np.array(resamples_SC.iloc[:,6:11]).reshape((-1,1))
TripPurposes = np.array([TripPurposes[x][0][-1] for x in range(TripPurposes.shape[0])]).reshape((-1,1))
ActivityDuration = y_test_realStay.reshape((-1,1))
ArrivalTime = np.array(y_test_SC_seq[:,:,2:21]).reshape((-1,19))
ArrivalTime = np.argmax(ArrivalTime,axis=1).reshape((-1,1))
Lat = y_test_LatLon[:,:,0].reshape((-1,1))
Lon = y_test_LatLon[:,:,1].reshape((-1,1))
df = pd.DataFrame(np.concatenate([Lat,Lon,TripPurposes,ActivityDuration,ArrivalTime],axis=1),
                 columns=['Lat','Lon','TripPurposes','ActivityDuration','ArrivalTime'])


## Remove the dummy trips (Trip purposes = 'Z' (None))
df = df[df['TripPurposes']!='Z']

## Assign the value for the attributes
df['ArrivalTime'] = df['ArrivalTime'].astype('int')+5 # 5 ~ 23
df['TripPurposes'] = df['TripPurposes'].replace("0","Commute")
df['TripPurposes'] = df['TripPurposes'].replace("1","Work")
df['TripPurposes'] = df['TripPurposes'].replace("2","OrganizedHobby")
df['TripPurposes'] = df['TripPurposes'].replace("3","Entertainment")
df['TripPurposes'] = df['TripPurposes'].replace("4","ReturningHome")


# ## Matching the Grid ID with Lat and Lon
# Grid = y_test_cond_SC_R[['P_Arrival_code','P_Arrival_x','P_Arrival_y']]
# Grid = Grid.drop_duplicates(subset='P_Arrival_code')
# Grid['Gu_code'] = Grid['P_Arrival_code'].astype("str").str[:4]
# Grid['P_Arrival_x'] = Grid['P_Arrival_x'].astype('object')
# Grid['P_Arrival_y'] = Grid['P_Arrival_y'].astype('object')
# Grid = pd.merge(left=TAZ_Coord,right=Grid,how='inner',on='P_Arrival_code')

# df = pd.merge(left=df,right=Grid,how='inner',on=['P_Arrival_x','P_Arrival_y'])
# df_F = df[['Lon','Lat','ArrivalTime','TripPurposes','ActivityDuration','P_Arr_LU1','P_Arr_LU2','P_Arr_LU3']]

## Save the processed data
df.to_csv("Data/ActivityPattern.csv",index=False)

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed