In [37]:
from prepare_data import preprocess_data_czech
from field_info import FieldInfo
from tensor_encoder import TensorEncoder
import pandas as pd
import tensorflow as tf
import numpy as np

In [38]:
raw_data = pd.read_csv('../DATA/tr_by_acct_w_age.csv')
data, LOG_AMOUNT_SCALE, TD_SCALE,ATTR_SCALE, START_DATE, TCODE_TO_NUM, NUM_TO_TCODE = preprocess_data_czech(raw_data)
data2 = data[['account_id','age','age_sc', 'tcode', 'tcode_num', 'datetime', 'month', 'dow', 'day', 'dtme' ,'log_amount_sc','td_sc']]
df= data2.copy()

n_tcodes = len(TCODE_TO_NUM)

info = FieldInfo(n_tcodes)

max_seq_len = 80
min_seq_len = 20
df_test = df[:10]
encoder = TensorEncoder(df, info, max_seq_len, min_seq_len)


In [39]:
encoder.encode()

Finished encoding 2000 of 14354 seqs
Finished encoding 4000 of 14354 seqs
Finished encoding 6000 of 14354 seqs
Finished encoding 8000 of 14354 seqs
Finished encoding 10000 of 14354 seqs
Finished encoding 12000 of 14354 seqs
Finished encoding 14000 of 14354 seqs
Took 33.39 secs


In [40]:
def make_batches(ds, buffer_size, batch_size):
    return ds.cache().shuffle(buffer_size).batch(batch_size).prefetch(tf.data.AUTOTUNE)




In [41]:
from sklearn.model_selection import train_test_split
n_seqs, n_steps, n_feat_inp = encoder.inp_tensor.shape
x_tr, x_cv, inds_tr, inds_cv, targ_tr, targ_cv = train_test_split(encoder.inp_tensor, np.arange(n_seqs), encoder.tar_tensor, test_size=0.2)

# Create TensorFlow dataset
ds_all = tf.data.Dataset.from_tensor_slices((encoder.inp_tensor.astype(np.float32), encoder.tar_tensor.astype(np.float32)))
ds_tr = tf.data.Dataset.from_tensor_slices((x_tr.astype(np.float32), targ_tr.astype(np.float32)))
ds_cv = tf.data.Dataset.from_tensor_slices((x_cv.astype(np.float32), targ_cv.astype(np.float32)))

BUFFER_SIZE = ds_all.cardinality().numpy()
bs = 64  # batch size


train_batches = make_batches(ds_tr, BUFFER_SIZE, bs)
val_batches =  make_batches(ds_cv, BUFFER_SIZE, bs)

In [42]:
from field_info import FieldInfo
fieldInfo = FieldInfo(n_tcodes)


loss_scce_logit = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

LOSS_WEIGHTS = {
 'td_sc':1.,
 'month': 0.015,
 'day': 0.025,
 'dtme': 0.025,
 'dow': 0.01,
 'tcode_num': 1.,
 'log_amount_sc': 2.}

FIELD_STARTS_TAR = fieldInfo.FIELD_STARTS_TAR
FIELD_DIMS_TAR = fieldInfo.FIELD_DIMS_TAR
LOSS_TYPES = fieldInfo.LOSS_TYPES

def log_normal_pdf(sample, mean, logvar, raxis=1):
    log2pi = tf.math.log(2. * np.pi)
    return  -.5 * ((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi)


def loss_function(real, preds):
    loss_parts = []
    loss_parts_weighted = []
    mask = tf.math.logical_not(tf.math.equal(tf.reduce_sum(real, axis=2), 0))
    for k, k_pred in preds.items():
        st = FIELD_STARTS_TAR[k]
        end = st + FIELD_DIMS_TAR[k]
        loss_type = LOSS_TYPES[k]
        if loss_type == "scce":
           loss_ = loss_scce_logit(real[:, :, st:end], k_pred)
        elif loss_type == "pdf":
           temp = -log_normal_pdf(real[:, :, st:end], k_pred[:,:,0:1], k_pred[:,:,1:2])
           loss_ = -log_normal_pdf(real[:, :, st:end], k_pred[:,:,0:1], k_pred[:,:,1:2])[:,:,0]
        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask
        loss_ = tf.reduce_sum(loss_)/tf.reduce_sum(mask) 

        loss_parts.append(loss_)
        loss_parts_weighted.append(loss_ * LOSS_WEIGHTS[k])
    return tf.reduce_sum(loss_parts_weighted)

class Train(object):
    def __init__(self, transformer):
        self.transformer = transformer
        self.train_loss = tf.keras.metrics.Mean(name='train_loss')
        self.validation_loss = tf.keras.metrics.Mean(name='val_loss')
        self.results = dict([(x, []) for x in ["loss", "val_loss"]])

    def train(self, train_batches, x_cv, targ_cv, epochs, early_stop):
        optimizer = tf.keras.optimizers.Adam() 
        for epoch in range(epochs):
            start = time.time()
            self.train_loss.reset_states()
            self.validation_loss.reset_states()
            for (batch_no, (inp, tar)) in enumerate(train_batches):
                with tf.GradientTape() as tape:
                    predictions, _ = transformer(inp, tar)
                    loss = loss_function(tar, predictions)
                gradients = tape.gradient(loss, transformer.trainable_variables)
                optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
               
                self.train_loss(loss)
                if batch_no % 50 == 0:
                    print(f'Epoch {epoch+1} Batch{batch_no} Loss{self.train_loss.result(): .4f}')
            print(f'Epoch {epoch + 1} Loss {self.train_loss.result():.4f}')
            for (_, (x_cv, targ_cv)) in enumerate(val_batches):
                predictions_val, _ = transformer(x_cv, targ_cv)
                loss_v = loss_function(targ_cv, predictions_val)
                self.validation_loss(loss_v)
            print(f"** on validation data loss is {self.validation_loss.result():.4f}")
            self.results["loss"].append(self.train_loss.result().numpy())
            self.results["val_loss"].append(self.validation_loss.result().numpy())
            
            print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\n')
            
            if min(self.results["val_loss"] ) < min(self.results["val_loss"][-early_stop:] ):
                
                print(f"Stopping early, last {early_stop} val losses are: {self.results['val_loss'][-early_stop:]} \
                      \nBest was {min(self.results['val_loss'] ):.3f}\n\n")
                break
        
import tensorflow as tf
from modules import Transformer
import time

ACTIVATIONS = {
    "td_sc": "relu",
    "log_amount_sc": "relu"
}
fieldInfo = FieldInfo(n_tcodes)
config = {}
config["ORDER"] = fieldInfo.DATA_KEY_ORDER
config["FIELD_STARTS_IN"] = fieldInfo.FIELD_STARTS_IN
config["FIELD_DIMS_IN"] = fieldInfo.FIELD_DIMS_IN
config["FIELD_STARTS_NET"] = fieldInfo.FIELD_STARTS_NET
config["FIELD_DIMS_NET"] = fieldInfo.FIELD_DIMS_NET
config["ACTIVATIONS"] = ACTIVATIONS

features = 26
d_embedding = 128
dff = 128
d_model = 128
batch_size = 64
seq_len = 80
maximum_position_encoding = 256
rate = 0.1
num_heads = 2
num_layers = 4
raw_features = 7
transformer = Transformer(features,dff, d_embedding, d_model, maximum_position_encoding,num_heads, num_layers,config, rate=0.1)
epochs = 3
early_stop = 2
train = Train(transformer)
with  tf.device('/gpu:1'):
    train.train(train_batches,x_cv, targ_cv, epochs, early_stop)

            

Epoch 1 Batch0 Loss 10.8780
Epoch 1 Batch50 Loss 8.3938
Epoch 1 Batch100 Loss 7.3667
Epoch 1 Batch150 Loss 6.9372
Epoch 1 Loss 6.7568
** on validation data loss is 5.6425
Time taken for 1 epoch: 16.71 secs

Epoch 2 Batch0 Loss 5.7318
Epoch 2 Batch50 Loss 5.4931
Epoch 2 Batch100 Loss 5.3381
Epoch 2 Batch150 Loss 5.2056
Epoch 2 Loss 5.1409
** on validation data loss is 4.7474
Time taken for 1 epoch: 16.53 secs

Epoch 3 Batch0 Loss 4.6637
Epoch 3 Batch50 Loss 4.6603
Epoch 3 Batch100 Loss 4.5716
Epoch 3 Batch150 Loss 4.4975
Epoch 3 Loss 4.4662
** on validation data loss is 4.2645
Time taken for 1 epoch: 16.48 secs



In [43]:
train.transformer

<modules.Transformer at 0x7f5d0c165a90>

In [44]:
START_DATE

Timestamp('1993-01-01 00:00:00')

In [45]:
import datetime
import calendar
from modules import create_masks

max_length = 25
MAX_YEARS_SPAN = 15
get_dtme = lambda d: calendar.monthrange(d.year, d.month)[1] - d.day

END_DATE = START_DATE.replace(year = START_DATE.year+ MAX_YEARS_SPAN)

ALL_DATES = [START_DATE + datetime.timedelta(i) for i in range((END_DATE - START_DATE).days)]

AD = np.array([(d.month % 12, d.day % 31, d.weekday() % 7, i, d.year, get_dtme(d)) for i, d in enumerate(ALL_DATES)])

start_date_opts = df.groupby("account_id")["datetime"].min().dt.date.to_list()   #len = 4500
n_seqs_to_generate = 3
start_dates = np.random.choice(start_date_opts, size=n_seqs_to_generate) # sample start dates from real data

attributes = encoder.attributes
seq_ages = np.random.choice(attributes, size=n_seqs_to_generate) # sample ages from real data

#generate sequences
start_inds = np.array([(d - START_DATE.date()).days for d in start_dates])    #array([1284,  201])
print(start_inds)
inp = np.repeat(np.array(seq_ages)[:, None, None], repeats=n_feat_inp, axis=2) / ATTR_SCALE   #(n_seqs_to_generate, 1, n_feat_inp) 
raw_date_info_list = []
for i in range(max_length):
    predictions, attn, raw_ps, date_inds, enc_preds, raw_date  = call_to_generate(transformer, inp, start_inds)
    print(date_inds)
    enc_preds = tf.reshape(tf.constant(enc_preds), shape=(-1,1, n_feat_inp))      #(n_seqs_to_generate, 1, n_feat_inp=26)
    inp = tf.concat([inp, enc_preds], axis=1)   
    raw_date_info_list.append(raw_date)  
    start_inds = date_inds



[ 894 1012  549]
[ 894 1019  555]
[ 905 1077  555]
[ 910 1136  559]
[ 910 1200  573]
[ 919 1238  576]
[ 922 1297  576]
[ 928 1325  581]
[ 935 1326  582]
[ 941 1338  585]
[ 941 1381  588]
[ 949 1473  603]
[ 959 1537  607]
[ 966 1563  607]
[ 966 1567  607]
[ 972 1573  607]
[ 972 1580  608]
[ 983 1676  612]
[ 990 1749  614]
[ 994 1778  616]
[1002 1870  620]
[1002 1929  637]
[1015 1992  637]
[1016 2022  639]
[1022 2082  642]
[1023 2111  643]


In [46]:
raw_date_info_list

[{'month': array([ 6, 10,  7]),
  'day': array([14, 17, 10]),
  'year': array([1995, 1995, 1994])},
 {'month': array([6, 0, 7]),
  'day': array([25, 14, 10]),
  'year': array([1995, 1995, 1994])},
 {'month': array([6, 2, 7]),
  'day': array([30, 11, 14]),
  'year': array([1995, 1996, 1994])},
 {'month': array([6, 4, 7]),
  'day': array([30, 15, 28]),
  'year': array([1995, 1996, 1994])},
 {'month': array([7, 5, 7]),
  'day': array([ 9, 23,  0]),
  'year': array([1995, 1996, 1994])},
 {'month': array([7, 7, 7]),
  'day': array([12, 21,  0]),
  'year': array([1995, 1996, 1994])},
 {'month': array([7, 8, 8]),
  'day': array([18, 18,  5]),
  'year': array([1995, 1996, 1994])},
 {'month': array([7, 8, 8]),
  'day': array([25, 19,  6]),
  'year': array([1995, 1996, 1994])},
 {'month': array([7, 8, 8]),
  'day': array([0, 0, 9]),
  'year': array([1995, 1996, 1994])},
 {'month': array([ 7, 10,  8]),
  'day': array([ 0, 13, 12]),
  'year': array([1995, 1996, 1994])},
 {'month': array([8, 1, 8])

In [47]:
# Transform the data generated by BF back to the original data space
seqs = inp
ages = seqs[:, 0, :] * ATTR_SCALE
seqs = seqs[:, 1:, :]
np.diff(ages)
assert np.sum(np.diff(ages)) == 0, f"Bad formating, expected all entries same in each row, got {ages}"
FIELD_STARTS_IN = transformer.FIELD_STARTS_IN
FIELD_DIMS_IN = transformer.FIELD_DIMS_IN
amts = seqs[:, :, FIELD_STARTS_IN["log_amount_sc"]].numpy() * LOG_AMOUNT_SCALE
amts = 10 ** amts
amts = np.round(amts - 1.0, 2)
days_passed = np.round(seqs[:, :, FIELD_STARTS_IN["td_sc"]] * TD_SCALE ).astype(int)
t_code = np.argmax(seqs[:, :, FIELD_STARTS_IN["tcode_num"]: FIELD_STARTS_IN["tcode_num"] + FIELD_DIMS_IN["tcode_num"]], axis=-1)



In [50]:

# Flatten arrays and translate transaction codes
flattened_amts = amts.flatten()
flattened_tcodes = t_code.flatten()
translated_tcodes = [NUM_TO_TCODE[code] for code in flattened_tcodes]

# Create DataFrame for amounts and transaction codes
df_synth = pd.DataFrame({
    'amount': flattened_amts,
    'transaction_code': translated_tcodes
})

# Handling account IDs
num_customers = amts.shape[0]
num_transactions = amts.shape[1]
account_ids = np.repeat(range(num_customers), num_transactions)
df_synth['account_id'] = account_ids

# Handling date information
months = []
days = []
years = []

for customer in range(num_customers):
    for transaction in range(num_transactions):
        months.append(raw_date_info_list[transaction]['month'][customer])
        days.append(raw_date_info_list[transaction]['day'][customer])
        years.append(raw_date_info_list[transaction]['year'][customer])

# Converting lists to numpy arrays
months = np.array(months)
days = np.array(days)
years = np.array(years)

# Function to substitute month 0 with 12 and adjust days based on the month
def adjust_month_and_day(month, day):
    # Substitute month 0 with 12
    month = 12 if month == 0 else month

    # Adjust the day based on the month
    # Months with 31 days
    if month in [1, 3, 5, 7, 8, 10, 12]:
        return month, 31 if day == 0 else day
    # February (not considering leap years in this example)
    elif month == 2:
        return month, 28 if day == 0 else day
    # Months with 30 days
    else:
        return month, 30 if day == 0 else day

# Applying the adjustments to months and days
adjusted_months, adjusted_days = zip(*[adjust_month_and_day(m, d) for m, d in zip(months, days)])

# Converting to numpy arrays
adjusted_months = np.array(adjusted_months)
adjusted_days = np.array(adjusted_days)


df_synth['year'] = years
df_synth['month'] = adjusted_months
df_synth['day'] = adjusted_days

df_synth['date'] = pd.to_datetime(df_synth[['year', 'month', 'day']])

# Handling days passed
flattened_days_passed = days_passed.flatten()
flattened_days_passed[::num_transactions] = 0  # Setting the first transaction's days_passed to 0
df_synth['days_passed'] = flattened_days_passed

In [54]:
pd.set_option('display.max_rows', 80)
df_synth

Unnamed: 0,amount,transaction_code,account_id,year,month,day,date,days_passed
0,417.709991,CREDIT__CREDIT IN CASH__nan,0,1995,6,14,1995-06-14,0
1,521.099976,CREDIT__CREDIT IN CASH__nan,0,1995,6,25,1995-06-25,11
2,44.23,CREDIT__nan__INTEREST CREDITED,0,1995,6,30,1995-06-30,5
3,26.459999,DEBIT__CASH WITHDRAWAL__PAYMENT ON STATEMENT,0,1995,6,30,1995-06-30,0
4,566.080017,DEBIT__CASH WITHDRAWAL__nan,0,1995,7,9,1995-07-09,9
5,1014.849976,CREDIT__CREDIT IN CASH__nan,0,1995,7,12,1995-07-12,3
6,419.980011,CREDIT__nan__INTEREST CREDITED,0,1995,7,18,1995-07-18,6
7,691.919983,DEBIT__CASH WITHDRAWAL__nan,0,1995,7,25,1995-07-25,7
8,82.489998,CREDIT__nan__INTEREST CREDITED,0,1995,7,31,1995-07-31,6
9,6.89,DEBIT__CASH WITHDRAWAL__PAYMENT ON STATEMENT,0,1995,7,31,1995-07-31,0


In [18]:
df

Unnamed: 0,amount,transaction_code,account_id
0,164.610001,DEBIT__CASH WITHDRAWAL__PAYMENT ON STATEMENT,0
1,104.949997,DEBIT__CASH WITHDRAWAL__,0
2,99.790001,CREDIT__nan__INTEREST CREDITED,0
3,14.6,DEBIT__CASH WITHDRAWAL__PAYMENT ON STATEMENT,0
4,2016.709961,CREDIT__COLLECTION FROM ANOTHER BANK__nan,0
5,157.119995,DEBIT__CASH WITHDRAWAL__PAYMENT ON STATEMENT,1
6,36.75,CREDIT__nan__INTEREST CREDITED,1
7,2632.419922,DEBIT__CREDIT CARD WITHDRAWAL__nan,1
8,1330.160034,DEBIT__REMITTANCE TO ANOTHER BANK__,1
9,2438.379883,CREDIT__CREDIT IN CASH__nan,1


In [28]:
t_code

array([[4, 3, 2, 4, 3],
       [3, 2, 3, 0, 3]])

In [33]:
days_passed

array([[ 5, 12,  6,  0,  5],
       [ 1,  4,  4,  2,  4]])

In [31]:
amts

array([[ 115.5 ,  114.69,   29.51,   17.83, 1439.09],
       [ 175.79,   55.53, 1083.36, 1482.6 , 1261.26]], dtype=float32)

In [25]:
FIELD_STARTS_IN

{'tcode_num': 0,
 'dow': 16,
 'month': 18,
 'day': 20,
 'dtme': 22,
 'td_sc': 24,
 'log_amount_sc': 25}

In [24]:
amts

array([[ 115.5 ,  114.69,   29.51,   17.83, 1439.09],
       [ 175.79,   55.53, 1083.36, 1482.6 , 1261.26]], dtype=float32)

In [10]:
def log_normal_pdf_gen(sample, mean, logvar, raxis=1):
    log2pi = tf.cast(tf.math.log(2. * np.pi), tf.float64)
    return  -.5 * ((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi)

def raw_dates_to_reencoded(raw_preds, start_inds,  max_days = 100, greedy_decode=False):

    """ Takes raw predictions (info about predicted day, month, dow, and days passed) and start inds (indicate the current date for each of the seqs) 
        Computes a number of days passed for each based on inputs (either greedily or with sampling)
         returns the new_dates (old_dates + days passed) and their indicies   """
    # raw_preds[k][:, -1]-- get the last element in each sequence  
    all_ps = [tf.nn.softmax(raw_preds[k][:,-1]).numpy() for k in ["month", "day", "dow", "dtme"]]  #length of list: 4
    timesteps = np.zeros(len(start_inds)).astype(int)
    for i, (month_ps, day_ps, dow_ps, dtme_ps, td_pred, si) in enumerate(zip(*all_ps, raw_preds["td_sc"][:,-1].numpy(), start_inds)):
            
        ps = month_ps[AD[si:si+max_days,0]]*day_ps[AD[si:si+max_days,1]]*dow_ps[AD[si:si+max_days,2]] *dtme_ps[AD[si:si+max_days,-1]] * \
                    np.exp(log_normal_pdf_gen(AD[si:si+max_days,3]-si, mean = td_pred[0]*TD_SCALE, logvar=td_pred[1]*TD_SCALE))  #shape(max_days,)

            
        if greedy_decode:
            timesteps[i] = np.argmax(ps)
        else:
            timesteps[i] = np.random.choice(max_days, p=ps/sum(ps))
    inds = start_inds + timesteps
        
        
    return_ = {}
    return_["td_sc"] = tf.expand_dims(timesteps.astype(np.float32)/ TD_SCALE, axis=1)
    return_["month"] = bulk_encode_time_value(AD[inds, 0], 12)
    return_["day"] = bulk_encode_time_value(AD[inds, 1], 31)
    return_["dow"] = bulk_encode_time_value(AD[inds, 2], 7)
    return_["dtme"] = bulk_encode_time_value(AD[inds, -1], 31)

    raw_date = {}
    raw_date['month'] = AD[inds, 0]
    raw_date['day'] = AD[inds, 1]
    raw_date['year'] = AD[inds, 4]

    return return_, inds, raw_date


def bulk_encode_time_value(val, max_val):
        """ encoding date features in the clockwise dimension """
        x = np.sin(2 * np.pi / max_val * val)
        y = np.cos(2 * np.pi / max_val * val)
        return np.stack([x, y], axis=1)


def reencode_net_prediction(net_name, predictions):
    """net_name is in ['tcode_num', 'dow', 'month', 'day', 'dtme', 'td_sc', 'log_amount_sc']
       predictions is the predicted feature (feature=net_name) 
       function:  transform predictions to the correct form to be used as input to BF
       the transformed predictions also are used for conditional generating
                
    """
    date_info = {'month':12, 'day':31, 'dtme':31, 'dow':7}
    batch_size = predictions.shape[0]
    if "_num" in net_name:
        dim = transformer.FIELD_DIMS_NET[net_name]
        choices = np.arange(dim)
        ps = tf.nn.softmax(predictions, axis=2).numpy().reshape(-1, dim)    #predictions: (n_seq_to_generate, seq_len, dim=16)
        choosen =  np.reshape([np.random.choice(choices, p=p) for p in ps], newshape=(batch_size, -1))

        return tf.one_hot(choosen, depth=dim)      #(n_seq_to_generate, seq_len, dim=16)

    elif net_name in date_info.keys():
        dim = transformer.FIELD_DIMS_NET[net_name]
        choices = np.arange(dim)
        ps = tf.nn.softmax(predictions, axis=2).numpy().reshape(-1, dim)
        choosen =  np.array([np.random.choice(choices, p=p) for p in ps])
        
        x = bulk_encode_time_value(choosen, max_val=dim)
        
        return np.reshape(x, newshape=(batch_size, -1, 2))

    elif net_name in ['td_sc', "log_amount_sc"]:
        return predictions[:, :, 0:1]
            

In [21]:
transformer.ORDER

ListWrapper(['tcode_num', 'dow', 'month', 'day', 'dtme', 'td_sc', 'log_amount_sc'])

In [11]:
def call_to_generate(transformer, inp, start_inds):
    """Forward pass through transformer
    Returns: preds, attn_w, raw_preds, inds
    the returned preds have multiple timesteps, but we only care about the last (it's the only new one)   """

    x = transformer.input_layer(inp)
    seq_len = tf.shape(x)[1]
    x += transformer.pos_encoding[:, :seq_len, :]     #x is the output of Input layer
    x = transformer.dropout(x, training=True)
    mask, _ = create_masks(inp)
    out, attention_weights = transformer.DecoderStack(x, True, mask)
    final_output = transformer.final_layer(out)

    ### Predict each field  ###
    preds = {}
    raw_preds = {}
    encoded_preds_d = {}
    #encoded_preds = []

    for net_name in transformer.ORDER:  
        pred = transformer.__getattribute__(net_name)(final_output)
        raw_preds[net_name] = pred

        pred = reencode_net_prediction(net_name, pred) 
        preds[net_name] = pred
            
        encoded_preds_d[net_name] = pred[:,-1,:] 
        #encoded_preds.append(pred[:,-1,:])
        final_output = tf.concat([final_output, pred], axis=2)

    date_info, inds, raw_date_info = raw_dates_to_reencoded(raw_preds, start_inds)
    
    encoded_preds_d.update(date_info)
    l = [encoded_preds_d[k] for k in transformer.ORDER]
    encoded_preds =  tf.expand_dims(tf.concat(l, axis=1), axis=1)   #tensor of shape (n_seqs_to_generate, 1, 26(input features))

    return preds, attention_weights, raw_preds, inds, encoded_preds, raw_date_info
    

     

In [34]:
pred

<tf.Tensor: shape=(2, 1, 16), dtype=float32, numpy=
array([[[ 2.606456  ,  1.9652989 ,  1.0356619 ,  1.6328188 ,
          1.0518488 ,  0.50476277, -2.082876  ,  0.17730191,
          0.9945889 , -6.3227634 , -0.73363006, -0.05845775,
         -2.3763046 , -2.1853278 , -3.2781813 , -2.904684  ]],

       [[ 2.3194718 ,  0.90071404,  0.9927165 ,  1.6883271 ,
          0.52983195,  0.76463836, -2.58171   ,  0.20744765,
          0.96543586, -5.3389544 , -1.4356111 ,  0.04906608,
         -2.498517  , -2.2893934 , -3.8080366 , -3.7633495 ]]],
      dtype=float32)>

In [33]:
reencode_net_prediction('tcode_num', pred)

<tf.Tensor: shape=(2, 1, 16), dtype=float32, numpy=
array([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
      dtype=float32)>

In [25]:
date_inds

array([1284,  201])

In [23]:
START_DATE

Timestamp('1993-01-01 00:00:00')

In [None]:
def generate_data(self, generatedseq_len, START_DATE,ATTR_SCALE,df, n_seqs_to_generate, attributes)

In [18]:
# from modules import InputEmbedLayer,InputEmbedLayer_Res, ResidualLayer, RandomNoise_Simulator_Normal, positional_encoding, MultiHeadAttention, create_masks, DecoderLayer
# import tensorflow as tf
# features = 26
# d_embedding = 128
# dff = 128
# d_model = 128
# batch_size = 64
# seq_len = 80
# maximum_position_encoding = 256
# rate = 0.1
# num_heads = 2
# num_layers = 4
# z = RandomNoise_Simulator_Normal(batch_size, seq_len, features)

# #Transformer Model
# input_ = tf.keras.layers.Input(shape=(None, features))
# x = InputEmbedLayer(features, dff , d_embedding)(input_)

# pos_encoding = positional_encoding(maximum_position_encoding, d_embedding)   #(1, maximum_position_encoding=256, d_model=128)

# seq_len = tf.shape(x)[1]
# x += pos_encoding[:, :seq_len, :]     #x is the output of Input layer

# x = tf.keras.layers.Dropout(rate)(x, training=True)

# attention_weights = {}
# mask, _ = create_masks(tar)
# for i in range(num_layers):
#     d_inp_decoder = tf.keras.backend.int_shape(x)[-1]
#     x, attentionweights = DecoderLayer(d_inp_decoder, d_model, num_heads, dff)(x, True, mask)
#     attention_weights['decoder_layer{}'.format(i+1)] = attentionweights

# final_output = tf.keras.layers.Dense(d_model, activation=None)(x)


# model = tf.keras.models.Model(input_, final_output)
