In [15]:
!pip install tensorflow==2.0.0

Collecting tensorflow==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (86.3MB)
[K     |████████████████████████████████| 86.3MB 38kB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/fc/08/8b927337b7019c374719145d1dceba21a8bb909b93b1ad6f8fb7d22c1ca1/tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 38.1MB/s 
Collecting tensorboard<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/76/54/99b9d5d52d5cb732f099baaaf7740403e83fe6b0cedde940fabd2b13d75a/tensorboard-2.0.2-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 27.2MB/s 
Collecting google-auth<2,>=1.6.3
[?25l  Downloading https://files.pythonhosted.org/packages/17/83/3cb31033e1ea0bdb8991b6ef327a5bf4960bd3dd31ff355881bfb0ddf199/google_aut

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
print(tf.__version__)

2.0.0


# Method to create windowed dataset for training data

In [0]:
def train_windowed_ds(series, win_w, batch_size, shuffle_size, win_shift=1):
    '''
    - Takes in a 'time' series of values
      The rows of input should represent "time" instances in chronological order
      Function generates a tf-windowed-dataset of features/targets
    
    - win_w     : user-defined 'int' that determines the width of sliding window
    - win_shift : user-defined 'int' that determines forward shift of the sliding window 

    - Returns: 
        A tensorflow dataset 
          With features:
            - 0th dimension representing the sliding-window-index (aka sample-index) 
            - 1st dimension representing the width of the sliding window (no. time-steps)
            - 2nd dimension representing the number of features (1 in this case) in each sliding window
          And targets: 
            - If win_w = 1, then each time-step has a target 
            - If win_w > 1, then multiple time-steps together have one target
    - Note on usage: When defining RNN-type networks, add a Lambda layer at the beginning to expand dim to 3
    '''
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(win_w + 1, shift=win_shift, drop_remainder=True)
    ds = ds.flat_map(lambda win: win.batch(win_w + 1))
    ds = ds.shuffle(shuffle_size, seed=1)
    ds = ds.map(lambda win: (win[:-1], win[-1]))
    return ds.batch(batch_size).prefetch(1)

# Method to create windowed dataset for validation data

In [0]:
'''
# Create windowed-dataset 
'''
def valid_windowed_ds(series, win_w, batch_size, win_shift = 1):
    '''
    - Takes in a 'time' series of values
      The rows of input should represent "time" instances in chronological order
      Function generates a tf-windowed-dataset of features/targets
    
    - win_w     : user-defined 'int' that determines the width of sliding window
    - win_shift : user-defined 'int' that determines forward shift of the sliding window 

    - Returns: 
        A tensorflow dataset 
          With features:
            - 0th dimension representing the sliding-window-index (aka sample-index) 
            - 1st dimension representing the width of the sliding window (no. time-steps)
            - 2nd dimension representing the number of features (1 in this case) in each sliding window
          And targets: 
            - If win_w = 1, then each time-step has a target 
            - If win_w > 1, then multiple time-steps together have one target
    - Note on usage: When defining RNN-type networks, add a Lambda layer at the beginning to expand dim to 3
        - tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[None])
    '''
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(win_w + 1, shift= win_shift, drop_remainder=True)
    ds = ds.flat_map(lambda win: win.batch(win_w + 1))
    ds = ds.map(lambda win: (win[:-1], win[-1:]))
    return ds.batch(batch_size).prefetch(1)

# Example 

In [10]:
# Create a contrived dataset 
data   = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]
train_valid_split = 0.7
train, valid = data[:int(train_valid_split*len(data))] , data[int(train_valid_split*len(data)):]


print('\nThis is the original list (contrived data for illustration only): ')
display(data)
print('\nThis is the training data : ')
display(train)
print('\nThis is the validation data : ')
display(valid)


# Create the train windowed dataset 
train_dataset = train_windowed_ds(train, win_w = 3, win_shift = 1, shuffle_size=3 , batch_size=10)
print('--'*20)
print('\nThis is the shuffled train windowed dataset: ')


for x,y in train_dataset:
    print('features = ', x.numpy()[:])
    print('labels = ', y.numpy()[:])

# -----------------------------------------------------------------------------------------

# Create the valid windowed dataset 
valid_dataset = valid_windowed_ds(valid, win_w = 3, win_shift = 1, batch_size=10)
print('--'*20)
print('\nThis is the valid windowed dataset: ')

for x,y in valid_dataset:
    print('features = ', x.numpy())
    print('labels = ', y.numpy())


This is the original list (contrived data for illustration only): 


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]


This is the training data : 


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]


This is the validation data : 


[12, 13, 14, 15, 16]

----------------------------------------

This is the shuffled train windowed dataset: 
features =  [[ 2  3  4]
 [ 4  5  6]
 [ 1  2  3]
 [ 6  7  8]
 [ 3  4  5]
 [ 7  8  9]
 [ 8  9 10]
 [ 5  6  7]]
labels =  [ 5  7  4  9  6 10 11  8]
----------------------------------------

This is the valid windowed dataset: 
features =  [[12 13 14]
 [13 14 15]]
labels =  [[15]
 [16]]
