In [1]:
!pip install tensorflow==2.0.0

Collecting tensorflow==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (86.3MB)
[K     |████████████████████████████████| 86.3MB 40kB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/fc/08/8b927337b7019c374719145d1dceba21a8bb909b93b1ad6f8fb7d22c1ca1/tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 48.0MB/s 
Collecting tensorboard<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/76/54/99b9d5d52d5cb732f099baaaf7740403e83fe6b0cedde940fabd2b13d75a/tensorboard-2.0.2-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 39.9MB/s 
Collecting google-auth<2,>=1.6.3
[?25l  Downloading https://files.pythonhosted.org/packages/17/83/3cb31033e1ea0bdb8991b6ef327a5bf4960bd3dd31ff355881bfb0ddf199/google_aut

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
print(tf.__version__)

2.0.0


# Method to create windowed dataset for training data

In [0]:
'''
# Create windowed-dataset 
'''
def train_windowed_ds(df: 'DataFrame',  win_w = 2, win_shift = 1, shuffle_size=100, batch_size=32):
    '''
    - Takes in a Pandas dataframe of features and a targets(labels) column
      The rows of input dataframe should represent "time" instances in chronological order
      Function generates a tf-windowed-dataset of features/targets
      - Targets must be the last column of the DataFrame 
    
    - win_w     : user-defined 'int' that determines the width of sliding window
    - win_shift : user-defined 'int' that determines forward shift of the sliding window 

    - Returns: 
        A tensorflow dataset 
          With features:
            - 0th dimension representing the sliding-window-index (aka sample-index) 
            - 1st dimension representing the width of the sliding window (no. time-steps)
            - 2nd dimension representing the number of features in each sliding window (all equal)
          And targets: 
            - If win_w = 1, then each time-step (w/ multiple features) has a target 
            - If win_w >1 , then each sliding-window (w/ multiple features) with multiple time-steps has a target
    '''
    # Extract fatures and lables into two Numpy arrays 
    X = df.iloc[:,:-1].values                     # Features 
    y = df.iloc[:, -1].values.reshape((-1,1))     # Targets


    # This transpose is taken to make the following dataset operations simpler 
    arr = X.T

    # Number of time instances and number of features 
    n_features, n_time  = arr.shape

    if win_w > n_time:
        raise ValueError(f'The width of the given time-window:{win_w} is not <= total number of time-steps: {n_time}.') 

    # Define the stride of the input elements for the sliding window 
    n_stride = 1
    # Calculate the total number of sliding windows
    n_win = (n_time - win_w)//n_stride + 1

    # Create a dataset from 2D numpy array of features 
    dsF = tf.data.Dataset.from_tensor_slices(arr)

    # Create a dataset from 2D (n by 1) numpy array of targets 
    dsL = tf.data.Dataset.from_tensor_slices(y)

    # Generate a tf dataset with flattened rolling windows for each feature 
    def generate_flattened_rolling_windows(ds):
        ds = tf.data.Dataset.from_tensor_slices(ds)
        ds = ds.window(size= win_w, shift=win_shift, stride = n_stride , drop_remainder=True)
        ds = ds.flat_map(lambda x: x.batch(win_w)) 
        ds = ds.unbatch()
        ds = ds.batch(n_win*win_w)
        return ds
    dsF = dsF.flat_map(generate_flattened_rolling_windows)
    # ---------------------------------------------------------------------

    # Generate a tf dataset with flattened rolling windows for the targets
    def generate_rolling_windows(ds):
        ds = ds.window(size= win_w, shift=win_shift, stride = n_stride , drop_remainder=True)
        ds = ds.flat_map(lambda x: x.batch(win_w)) 
        if win_w !=1:
            ds = ds.map(lambda x: x[-1]) 
        return ds
    dsL = generate_rolling_windows(dsL)
    # ---------------------------------------------------------------------

    # Create a 3D numpy array of windowed time-series features  
    features =  np.array( [ele.numpy() for ele in dsF] ).T.reshape((n_win , win_w, n_features))
    # Create a 3D numpy array of windowed targets   
    labels   =  np.array([win.numpy() for win in dsL])

    # Reduce dimensions if the sliding window has unit width i.e. win_w = 1 
    if win_w == 1:
        features = features.reshape((n_win, n_features))
        labels = labels.reshape((-1,1))

    # Form a tensorflow dataset from numpy feartues/labels 
    dataset = tf.data.Dataset.from_tensor_slices((features, labels)) 

    # Shuffle, batch and prefetch the data 
    dataset = dataset.shuffle(buffer_size= shuffle_size, seed=1).batch(batch_size).prefetch(1)
    return dataset

# Method to create windowed dataset for validation data

In [0]:
'''
# Create windowed-dataset 
'''
def valid_windowed_ds(df: 'DataFrame',  win_w = 2, win_shift = 1, batch_size=32):
    '''
    - Takes in a Pandas dataframe of features and a targets(labels) column
      The rows of input dataframe should represent "time" instances in chronological order
      Function generates a tf-windowed-dataset of features/targets
      - Targets must be the last column of the DataFrame 
    
    - win_w     : user-defined 'int' that determines the width of sliding window
    - win_shift : user-defined 'int' that determines forward shift of the sliding window 

    - Returns: 
        A tensorflow dataset 
          With features:
            - 0th dimension representing the sliding-window-index (aka sample-index) 
            - 1st dimension representing the width of the sliding window (no. time-steps)
            - 2nd dimension representing the number of features in each sliding window (all equal)
          And targets: 
            - If win_w = 1, then each time-step (w/ multiple features) has a target 
            - If win_w >1 , then each sliding-window (w/ multiple features) with multiple time-steps has a target
    '''
    # Extract fatures and targets into two Numpy arrays 
    X = df.iloc[:,:-1].values                     # Features 
    y = df.iloc[:, -1].values.reshape((-1,1))     # Targets


    # This transpose is taken to make the following dataset operations simpler 
    arr = X.T

    # Number of time instances and number of features 
    n_features, n_time  = arr.shape

    if win_w > n_time:
        raise ValueError(f'The width of the given time-window:{win_w} is not <= total number of time-steps: {n_time}.') 

    # Define the stride of the input elements for the sliding window 
    n_stride = 1
    # Calculate the total number of sliding windows
    n_win = (n_time - win_w)//n_stride + 1

    # Create a dataset from 2D numpy array of features 
    dsF = tf.data.Dataset.from_tensor_slices(arr)

    # Create a dataset from 2D (n by 1) numpy array of targets 
    dsL = tf.data.Dataset.from_tensor_slices(y)

    # Generate a tf dataset with flattened rolling windows for each feature 
    def generate_flattened_rolling_windows(ds):
        ds = tf.data.Dataset.from_tensor_slices(ds)
        ds = ds.window(size= win_w, shift=win_shift, stride = n_stride , drop_remainder=True)
        ds = ds.flat_map(lambda x: x.batch(win_w)) 
        ds = ds.unbatch()
        ds = ds.batch(n_win*win_w)
        return ds
    dsF = dsF.flat_map(generate_flattened_rolling_windows)
    # ---------------------------------------------------------------------

    # Generate a tf dataset with flattened rolling windows for the targets
    def generate_rolling_windows(ds):
        ds = ds.window(size= win_w, shift=win_shift, stride = n_stride , drop_remainder=True)
        ds = ds.flat_map(lambda x: x.batch(win_w)) 
        if win_w !=1:
            ds = ds.map(lambda x: x[-1]) 
        return ds
    dsL = generate_rolling_windows(dsL)
    # ---------------------------------------------------------------------

    # Create a 3D numpy array of windowed time-series features  
    features =  np.array( [ele.numpy() for ele in dsF] ).T.reshape((n_win , win_w, n_features))
    # Create a 3D numpy array of windowed targets   
    labels   =  np.array([win.numpy() for win in dsL])

    # Reduce dimensions if the sliding window has unit width i.e. win_w = 1 
    if win_w == 1:
        features = features.reshape((n_win, n_features))
        labels = labels.reshape((-1,1))

    # Form a tensorflow dataset from numpy feartues/labels 
    dataset = tf.data.Dataset.from_tensor_slices((features, labels)) 

    # Shuffle, batch and prefetch the data 
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset

# Example 

In [13]:
# Create a contrived dataset 
data   = np.array([[1,  10, 100, 1000],
                   [2,  20, 200, 2000],
                   [3,  30, 300, 3000],
                   [4,  40, 400, 4000], 
                   [5,  50, 500, 5000],
                   [6,  60, 600, 6000],
                   [7,  70, 700, 7000],
                   [8,  80, 800, 8000],
                   [9,  90, 900, 9000],
                   [10,  100, 1000, 10000],
                   ])

df = pd.DataFrame(data, columns=['F1', 'F2', 'F3', 'Target'])

train_valid_split = 0.7
train, valid = df.iloc[:int(train_valid_split*len(df)),:] , df.iloc[int(train_valid_split*len(df)):,:]
print('\nThis is the original dataframe (contrived data for illustration only): ')
display(df)
print('\nThis is the training data : ')
display(train)
print('\nThis is the validation data : ')
display(valid)


# Create the train windowed dataset 
train_dataset = train_windowed_ds(train, win_w = 3, win_shift = 1, shuffle_size=3 , batch_size=10)
print('--'*20)
print('\nThis is the shuffled train windowed dataset: ')


for x,y in train_dataset:
    print('features = ', x.numpy()[:])
    print('labels = ', y.numpy()[:])

# -----------------------------------------------------------------------------------------

# Create the valid windowed dataset 
valid_dataset = valid_windowed_ds(valid, win_w = 2, win_shift = 1, batch_size=10)
print('--'*20)
print('\nThis is the valid windowed dataset: ')

for x,y in valid_dataset:
    print('features = ', x.numpy())
    print('labels = ', y.numpy())


This is the original dataframe (contrived data for illustration only): 


Unnamed: 0,F1,F2,F3,Target
0,1,10,100,1000
1,2,20,200,2000
2,3,30,300,3000
3,4,40,400,4000
4,5,50,500,5000
5,6,60,600,6000
6,7,70,700,7000
7,8,80,800,8000
8,9,90,900,9000
9,10,100,1000,10000



This is the training data : 


Unnamed: 0,F1,F2,F3,Target
0,1,10,100,1000
1,2,20,200,2000
2,3,30,300,3000
3,4,40,400,4000
4,5,50,500,5000
5,6,60,600,6000
6,7,70,700,7000



This is the validation data : 


Unnamed: 0,F1,F2,F3,Target
7,8,80,800,8000
8,9,90,900,9000
9,10,100,1000,10000


----------------------------------------

This is the shuffled train windowed dataset: 
features =  [[[  2  20 200]
  [  3  30 300]
  [  4  40 400]]

 [[  4  40 400]
  [  5  50 500]
  [  6  60 600]]

 [[  1  10 100]
  [  2  20 200]
  [  3  30 300]]

 [[  5  50 500]
  [  6  60 600]
  [  7  70 700]]

 [[  3  30 300]
  [  4  40 400]
  [  5  50 500]]]
labels =  [[4000]
 [6000]
 [3000]
 [7000]
 [5000]]
----------------------------------------

This is the valid windowed dataset: 
features =  [[[   8   80  800]
  [   9   90  900]]

 [[   9   90  900]
  [  10  100 1000]]]
labels =  [[ 9000]
 [10000]]
