In [1]:
from __future__ import print_function, division
import keras.backend as K
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
from tensorflow.keras import regularizers
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
import math
import torch.nn as nn
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import torch
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from tensorflow.keras.layers import Input, Dense, Reshape, Concatenate, Layer, Dropout
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, Flatten,LeakyReLU,ReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import RMSprop, Adam
from functools import partial
import tensorflow as tf
from gumbel_softmax_EJ import GumbelSoftmax
from IPython.core.interactiveshell import InteractiveShell
pd.options.display.max_rows = 2000

"""
## Implement multi head self attention as a Keras layer
"""

class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output

"""
## Implement a Transformer block as a layer
"""

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=True):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

"""
# Positional Embedding
"""
class PositionalEncoding1D(nn.Module):
    def __init__(self, channels):
        """
        :param channels: The last dimension of the tensor you want to apply pos emb to.
        """
        super(PositionalEncoding1D, self).__init__()
        self.channels = channels
        inv_freq = 1. / (10000 ** (torch.arange(0, channels, 2).float() / channels))
        self.register_buffer('inv_freq', inv_freq)

    def forward(self, tensor):
        """
        :param tensor: A 3d tensor of size (batch_size, x, ch)
        :return: Positional Encoding Matrix of size (batch_size, x, ch)
        """
        if len(tensor.shape) != 3:
            raise RuntimeError("The input tensor has to be 3d!")
        _, x, orig_ch = tensor.shape
        pos_x = torch.arange(x, device=tensor.device).type(self.inv_freq.type())
        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
        emb_x = torch.cat((sin_inp_x.sin(), sin_inp_x.cos()), dim=-1)
        emb = torch.zeros((x,self.channels),device=tensor.device).type(tensor.type())
        emb[:,:self.channels] = emb_x

        return emb[None,:,:orig_ch]

class PositionalEncoding2D(nn.Module):
    def __init__(self, channels):
        """
        :param channels: The last dimension of the tensor you want to apply pos emb to.
        """
        super(PositionalEncoding2D, self).__init__()
        channels = int(np.ceil(channels/2))
        self.channels = channels
        inv_freq = 1. / (10000 ** (torch.arange(0, channels, 2).float() / channels))
        self.register_buffer('inv_freq', inv_freq)

    def forward(self, tensor):
        """
        
        :param tensor: A 4d tensor of size (batch_size, x, y, ch)
        :return: Positional Encoding Matrix of size (batch_size, x, y, ch)
        """
        if len(tensor.shape) != 4:
            raise RuntimeError("The input tensor has to be 4d!")
        _, x, y, orig_ch = tensor.shape
        pos_x = torch.arange(x, device=tensor.device).type(self.inv_freq.type())
        pos_y = torch.arange(y, device=tensor.device).type(self.inv_freq.type())
        sin_inp_x = torch.einsum("i,j->ij", pos_x, self.inv_freq)
        sin_inp_y = torch.einsum("i,j->ij", pos_y, self.inv_freq)
        emb_x = torch.cat((sin_inp_x.sin(), sin_inp_x.cos()), dim=-1).unsqueeze(1)
        emb_y = torch.cat((sin_inp_y.sin(), sin_inp_y.cos()), dim=-1)
        emb = torch.zeros((x,y,self.channels*2),device=tensor.device).type(tensor.type())
        emb[:,:,:self.channels] = emb_x
        emb[:,:,self.channels:2*self.channels] = emb_y
        return emb[None,:,:,:orig_ch]

In [2]:
tf.compat.v1.disable_eager_execution()
gpu = tf.config.experimental.get_visible_devices('GPU')[0]
tf.config.experimental.set_memory_growth(device = gpu, enable = True)

In [3]:
## Load the Data processed by R
x_train_cond_R = pd.read_csv('01_Data/x_train_cond_R_Coord_ID_DisR.csv')
y_train_cond_R = pd.read_csv('01_Data/y_train_cond_R_Coord_ID_DisR.csv')
y_test_cond_SC_R = pd.read_csv('01_Data/y_test_cond_SC_R_Coord_ID_DisR.csv')
y_train_cond_SC_R = pd.read_csv('01_Data/y_train_cond_SC_R_Coord_ID_DisR.csv')

x_train_cond_R = x_train_cond_R[y_train_cond_R['Max_seq'] != 6]
y_train_cond_R = y_train_cond_R[y_train_cond_R['Max_seq'] != 6]
y_test_cond_SC_R = y_test_cond_SC_R[y_test_cond_SC_R['Max_seq'] != 6]
y_train_cond_SC_R = y_train_cond_SC_R[y_train_cond_SC_R['Max_seq'] != 6]

In [4]:
x_catcol = x_train_cond_R.columns.drop(['ID','P_Trip_seq'])
y_catcol = y_train_cond_R.columns.drop(['ID','P_Trip_seq',"JIGA","P_Home_Meanage","P_Home_Older"])

In [5]:
x_train_cond_R[x_catcol]= x_train_cond_R[x_catcol].apply(lambda x: x.astype('category') )
y_train_cond_R[y_catcol]= y_train_cond_R[y_catcol].apply(lambda x: x.astype('category'))
y_test_cond_SC_R[y_catcol]= y_test_cond_SC_R[y_catcol].apply(lambda x: x.astype('category'))
y_train_cond_SC_R[y_catcol]= y_train_cond_SC_R[y_catcol].apply(lambda x: x.astype('category'))

x_train_cond_R = x_train_cond_R.sort_values(by=['ID','P_Trip_seq'],axis=0)
y_train_cond_R = y_train_cond_R.sort_values(by=['ID','P_Trip_seq'],axis=0)
y_train_cond_SC_R = y_train_cond_SC_R.sort_values(by=['NID','P_Trip_seq'],axis=0)
y_test_cond_SC_R = y_test_cond_SC_R.sort_values(by=['NID','P_Trip_seq'],axis=0)


## Ground-truth of X

samples = pd.concat([x_train_cond_R['ID'],x_train_cond_R[x_catcol].drop('P_Trip_purpose',axis=1)],axis=1)
samples_R = x_train_cond_R.copy()
samples_R['idx'] = samples_R.groupby('ID').cumcount()
samples_R['prod_idx'] = 'TP_' + samples_R.idx.astype(str)

Trip_purpose = samples_R.pivot(index='ID',columns='prod_idx',values='P_Trip_purpose')
for col in Trip_purpose.columns:
    Trip_purpose[col] = Trip_purpose[col].cat.add_categories("Z").fillna("Z")    
samples =  pd.merge(samples.groupby('ID').head(1),Trip_purpose,on="ID")


## Processing the x_train_cond
x_train_cond = pd.concat([x_train_cond_R['ID'],pd.get_dummies(x_train_cond_R[x_catcol].drop('P_Trip_purpose',axis=1))],axis=1)
x_train_cond_R['idx'] = x_train_cond_R.groupby('ID').cumcount()
x_train_cond_R['prod_idx'] = 'TP_' + x_train_cond_R.idx.astype(str)

Trip_purpose = x_train_cond_R.pivot(index='ID',columns='prod_idx',values='P_Trip_purpose')
for col in Trip_purpose.columns:
    Trip_purpose[col] = Trip_purpose[col].cat.add_categories("Z").fillna("Z")
    
Trip_purpose = Trip_purpose[Trip_purpose['TP_1'] != 'Z']

Trip_purpose = pd.get_dummies(Trip_purpose)
x_train_cond =  pd.merge(x_train_cond.groupby('ID').head(1),Trip_purpose,on="ID")






In [6]:
y_train_cat = y_train_cond_R[['isHome','P_Arrival_time','stay_time','tr_time']]
#y_train_num = y_train_cond_R[['Age_SC','start_time','stay_time','tr_time','JIGA','P_Home_Meanage','P_Home_Older']]
y_train_seq = pd.concat([pd.get_dummies(y_train_cat),y_train_cond_R[['P_Arrival_x','P_Arrival_y','ID','P_Trip_seq']]],axis=1)
y_train_seq = y_train_seq[y_train_seq['ID'].isin(x_train_cond['ID'])]


y_train_SC_cat = y_train_cond_SC_R[['isHome','P_Arrival_time','stay_time','tr_time']]
y_train_SC_seq = pd.concat([pd.get_dummies(y_train_SC_cat),y_train_cond_SC_R[['P_Arrival_x','P_Arrival_y','NID','P_Trip_seq']]],axis=1)

y_train_nseq = pd.concat([pd.get_dummies(y_train_cond_R[['Age_SC','start_time']]),y_train_cond_R[['ID','JIGA','P_Home_Meanage','P_Home_Older']]],axis=1)
y_train_SC_nseq = pd.concat([pd.get_dummies(y_train_cond_SC_R[['Age_SC','start_time']]),y_train_cond_SC_R[['NID','JIGA','P_Home_Meanage','P_Home_Older']]],axis=1)

y_train_nseq = y_train_nseq.groupby('ID').head(1)
y_train_nseq = y_train_nseq[y_train_nseq['ID'].isin(x_train_cond['ID'])]
y_train_nseq = y_train_nseq.drop(['ID'],axis=1)

y_train_SC_nseq = y_train_SC_nseq.groupby('NID').head(1)
y_train_SC_nseq = y_train_SC_nseq.drop(['NID'],axis=1)

x_train_cond = x_train_cond.drop(['ID'],axis=1)

In [7]:
num_features = len(y_train_seq.columns)-4
maxlen = 5
num_data = y_train_seq['ID'].nunique()
num_data_SC = y_train_SC_seq['NID'].nunique()

## Adding dummy dimension to be divded 4
for i in range(3):
    y_train_seq.insert(num_features,i,0)
    y_train_SC_seq.insert(num_features,i,0)

num_features = len(y_train_seq.columns)-4

In [8]:
# Zero padding
def pad(x):
    zero_data = np.zeros(shape=(maxlen - len(x),num_features+4))
    d = pd.DataFrame(zero_data, columns=x.columns)
    data = x.append(d, ignore_index=True)
    return data

In [9]:
y_train_SC_seq=y_train_SC_seq.groupby('NID').apply(pad)
y_train_SC_seq=y_train_SC_seq.to_numpy()
y_train_SC_seq=y_train_SC_seq.reshape(num_data_SC,maxlen,num_features+4)

y_train_seq=y_train_seq.groupby('ID').apply(pad)
y_train_seq=y_train_seq.to_numpy()
y_train_seq=y_train_seq.reshape(num_data,maxlen,num_features+4)

In [10]:
from sklearn.model_selection import train_test_split

def choice_train_test_split(X, y, y_ns, test_size=0.2,random_state=1004):
    test_num = int(X.shape[0] * test_size)
    train_num = X.shape[0] - test_num
    np.random.seed(random_state)
    train_idx = np.random.choice(X.shape[0], train_num, replace=False)
    test_idx = np.setdiff1d(range(X.shape[0]), train_idx)
    X_train = X.iloc[train_idx, :]
    X_test = X.iloc[test_idx, :]
    y_train = y[train_idx,:]
    y_test = y[test_idx,:]
    y_train_ns = y_ns.iloc[train_idx,:]
    y_test_ns = y_ns.iloc[test_idx,:]     

    return X_train, X_test, y_train, y_test,y_train_ns,y_test_ns
 
X_train, X_test, y_train, y_test, y_train_ns, y_test_ns = choice_train_test_split(x_train_cond,y_train_seq,y_train_nseq,test_size=0.2,random_state=1004)
y_train_SC, y_test_SC,y_train_SC_ns,y_test_SC_ns = train_test_split(y_train_SC_seq,y_train_SC_nseq,test_size=0.2,shuffle=True,random_state=1004)


In [None]:
np.save('01_Data/y_train_seq', y_train,allow_pickle=True)
np.save('01_Data/y_test_seq', y_test,allow_pickle=True)
np.save('01_Data/y_train_SC_seq', y_train_SC,allow_pickle=True)
np.save('01_Data/y_test_SC_seq', y_test_SC,allow_pickle=True)

X_train.to_csv('01_Data/x_train_cond.csv',index=False)
X_test.to_csv('01_Data/x_test_cond.csv',index=False)
y_train_ns.to_csv('01_Data/y_train_nseq.csv',index=False)
y_test_ns.to_csv('01_Data/y_test_nseq.csv',index=False)
y_train_SC_ns.to_csv('01_Data/y_train_SC_nseq.csv',index=False)
y_test_SC_ns.to_csv('01_Data/y_test_SC_nseq.csv',index=False)