In [2]:
import tensorflow as tf
import numpy as np
import scipy as sp
import sklearn as skl
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import math
import os
from pathlib import Path
from tensorflow import keras
from keras import layers
from keras import models
from keras import regularizers
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from datetime import datetime, date, timedelta

# Script
# - loads data from inidata.csv
# - generates training and test data sets for the predictive FCNN model.
# - Creates and trains a predictive model.  The learning history is written to the file 'history_p3.txt'

# Basic actions:
# 1. The test data set is formed from the last 360 days of history
# 2. Script selects the following features for the predictive model: client type, client, outbound location, supplier,
# description group id, consisting of 3 descriptions that match in meaning but have different word orders,
# month (1..12), day(1..31), weekday(1..7), quantity (shipment volume), stop (time threshold)

WORKING_DIRECTORY = 'C:/Pilot/test/'
os.chdir (WORKING_DIRECTORY)

fpLog = Path ('log.txt')

with open (fpLog, 'w') as flog:
    print ('model p3 v1.3 starts at : ', datetime.now(), file = flog)

#fpini = Path ('intermediate.csv')  
fpini = Path ('inidata.csv')
dfdata = pd.read_csv (
    fpini, 
    dtype = {
        'tid':'float','cid':'float','oid':'float','sid':'float',
        'yy':'float','mm':'float','dd':'float','wd':'float',
        'is_i':np.int32,'txt':'str','nsl':'float','drn':'float','qnt':'float',
        'stop':'float','phash':np.int64,'T01':'float','T02':'float','T03':'float',
        'T04':'float','T05':'float','T06':'float','T07':'float','T08':'float',
        'T09':'float','T10':'float','T11':'float','T12':'float','T13':'float',
        'T14':'float','T15':'float','T16':'float','T17':'float','T18':'float',
        'T19':'float','T20':'float','did':'float',
        'tid_i':np.int32,'cid_i':np.int32,'oid_i':np.int32,'sid_i':np.int32,'did_i':np.int32
    }
    ).fillna(0)

#dfdata = pd.read_csv (fpini, dtype = 'float').fillna(0)
#dfdata.rename (columns = {'is': 'is_i'}, inplace = True)
#dfdata['tid_i'] = dfdata['tid'].astype(np.int32)
#dfdata['cid_i'] = dfdata['cid'].astype(np.int32)
#dfdata['oid_i'] = dfdata['oid'].astype(np.int32)
#dfdata['sid_i'] = dfdata['sid'].astype(np.int32)

#dfdata['did_i'] = dfdata['did'].astype(np.int32)
#dfdid_stats = dfdata['did'].describe().transpose()

#dfdata['did'] = (dfdata['did'] - dfdid_stats['mean'])/dfdid_stats['std']

rng = np.random.default_rng()

ismax = np.max (dfdata.loc(axis = 1)['is_i'])
with open (fpLog, 'a') as flog:
    print ('ismax: ', ismax, file = flog)

dftest  = dfdata.loc[dfdata.is_i >= (ismax - 360)].copy()
dftrain = dfdata.loc[dfdata.is_i <  (ismax - 360)].copy()

with open (fpLog, 'a') as flog:
    print ('dftest: ',  dftest.shape,  file = flog)
    print ('dftrain: ', dftrain.shape, file = flog)

lx     = ['tid', 'cid', 'oid', 'sid', 'did', 'mm', 'dd', 'wd', 'qnt', 'stop']
ly    = ["T%02d" % (i,) for i in range(1, 21)]
    
dfXtrain, dfYtrain = dftrain.loc(axis = 1)[lx], dftrain.loc(axis = 1)[ly]
dfXtest,  dfYtest  = dftest.loc (axis = 1)[lx], dftest.loc (axis = 1)[ly]

ktrInputA = layers.Input (shape = (dfXtrain.shape[1], ), name = "INPUT_A")

ktr = layers.Dense (units ='256', kernel_regularizer = regularizers.l2(0.0001), activation = 'elu')(ktrInputA)
ktr = layers.BatchNormalization ()(ktr)
#ktr = layers.Dropout(0.05)(ktr)
ktr = layers.Dense (units ='256', kernel_regularizer = regularizers.l2(0.0001), activation = 'elu')(ktr)
ktr = layers.BatchNormalization ()(ktr)
#ktr = layers.Dropout(0.05)(ktr)
ktr = layers.Dense (units ='256', kernel_regularizer = regularizers.l2(0.0001), activation = 'elu')(ktr)
ktr = layers.BatchNormalization ()(ktr)
#ktr = layers.Dropout(0.05)(ktr)
ktr = layers.Dense (units ='256', kernel_regularizer = regularizers.l2(0.0001), activation = 'elu')(ktr)
ktr = layers.BatchNormalization ()(ktr)
#ktr = layers.Dropout(0.05)(ktr)
ktrOut = layers.Dense (units ='20', kernel_regularizer = regularizers.l2(0.0001), activation = 'relu')(ktr)

mdSimple_h = keras.Model (inputs = [ktrInputA], outputs = ktrOut)

mdSimple_h.summary()

iBatchSize = 64

optimizer = tf.keras.optimizers.Adam (learning_rate = 0.001)

mdSimple_h.compile (
    optimizer = optimizer, 
    loss = tf.keras.losses.MeanSquaredError(), 
    metrics = tf.keras.metrics.RootMeanSquaredError()
    )

history_h = mdSimple_h.fit (
    {"INPUT_A": dfXtrain.to_numpy()},
    dfYtrain.to_numpy(),
    epochs = 100,
    batch_size = iBatchSize, 
    validation_data = ({"INPUT_A": dfXtest.to_numpy()}, dfYtest.to_numpy())
    )

fphist = Path ('history_p3.txt')
dfhist = pd.DataFrame (history_h.history)
dfhist.to_csv (fphist, index = False)


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 INPUT_A (InputLayer)        [(None, 10)]              0         
                                                                 
 dense_5 (Dense)             (None, 256)               2816      
                                                                 
 batch_normalization_4 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dense_6 (Dense)             (None, 256)               65792     
                                                                 
 batch_normalization_5 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dense_7 (Dense)             (None, 256)               6579