In [0]:
!pip install tensorflow==2.0.0

Collecting tensorflow==2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/0f/7bd55361168bb32796b360ad15a25de6966c9c1beb58a8e30c01c8279862/tensorflow-2.0.0-cp36-cp36m-manylinux2010_x86_64.whl (86.3MB)
[K     |████████████████████████████████| 86.3MB 37kB/s 
Collecting tensorboard<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/76/54/99b9d5d52d5cb732f099baaaf7740403e83fe6b0cedde940fabd2b13d75a/tensorboard-2.0.2-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 28.7MB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/fc/08/8b927337b7019c374719145d1dceba21a8bb909b93b1ad6f8fb7d22c1ca1/tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449kB)
[K     |████████████████████████████████| 450kB 48.4MB/s 
Collecting google-auth<2,>=1.6.3
[?25l  Downloading https://files.pythonhosted.org/packages/7b/cb/786dc53d93494784935a62947643b48250b84a882474e714f9af5e1a1928/google_aut

In [0]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [0]:
'''
# Create windowed-dataset 
'''
def generate_windowed_ds(df: 'pandas DataFrame',  win_w = 2, win_shift = 1):
    '''
    - Takes in a Pandas dataframe of features and a target(label) column
      The rows should represent "time" instances in chronological order
      It generates a tf-windowed-dataset of features/target
      - Targets (labels) must be the last column of the DataFrame 
    
    - win_w     : user-defined 'int' that determines the width of sliding window
    - win_shift : user-defined 'int' that determines forward shift of the sliding window 

    - Returns: 
        A tensorflow dataset 
          With features:
            - 0th dimension representing the sliding-window-index (sample-index) 
            - 1st dimension representing window-width: the number 
              of time istances in each sliding-window
            - 2nd dimension representing the number of features 
          And targets: 
            - If win_w = 1, then each time instance has a target (label)
            - If win_w >1 , then each sliding-window of multiple-features has a label
                - The target accompanies the last time instance in the sliding window 
    '''
    # Extract fatures and lables into two Numpy arrays 
    X = df.iloc[:,:-1].values                     # Features 
    y = df.iloc[:, -1].values.reshape((-1,1))     # Targets


    # This transpose is taken to make the following dataset operations simpler 
    arr = X.T

    # Number of time instances and number of features 
    n_features, n_time  = arr.shape

    if win_w > n_time:
        raise ValueError(f'The width of the given time-window:{win_w} is not <= total number of time-steps: {n_time}.') 

    # Define the stride of the input elements for the sliding window 
    n_stride = 1
    # Calculate the total number of sliding windows
    n_win = (n_time - win_w)//n_stride + 1

    # Create a dataset from 2D numpy array of features 
    dsF = tf.data.Dataset.from_tensor_slices(arr)

    # Create a dataset from 2D (n by 1) numpy array of targets 
    dsL = tf.data.Dataset.from_tensor_slices(y)

    # Generate a tf dataset with flattened rolling windows for each feature 
    def generate_flattened_rolling_windows(ds):
        ds = tf.data.Dataset.from_tensor_slices(ds)
        ds = ds.window(size= win_w, shift=win_shift, stride = n_stride , drop_remainder=True)
        ds = ds.flat_map(lambda x: x.batch(win_w)) 
        ds = ds.unbatch()
        ds = ds.batch(n_win*win_w)
        return ds
    dsF = dsF.flat_map(generate_flattened_rolling_windows)
    # ---------------------------------------------------------------------

    # Generate a tf dataset with flattened rolling windows for the targets
    def generate_rolling_windows(ds):
        ds = ds.window(size= win_w, shift=win_shift, stride = n_stride , drop_remainder=True)
        ds = ds.flat_map(lambda x: x.batch(win_w)) 
        if win_w !=1:
            ds = ds.map(lambda x: x[-1]) 
        return ds
    dsL = generate_rolling_windows(dsL)
    # ---------------------------------------------------------------------


    # Create a 3D numpy array of windowed time-series features  
    features =  np.array( [ele.numpy() for ele in dsF] ).T.reshape((n_win , win_w, n_features))
    # Create a 3D numpy array of windowed targets   
    labels   =  np.array([win.numpy() for win in dsL])

    # Reduce dimensions if the sliding window has unit width i.e. win_w = 1 
    if win_w == 1:
        features = features.reshape((n_win, n_features))
        labels = labels.reshape((-1,1))

    # Form a tensorflow dataset from numpy feartues/labels 
    dataset = tf.data.Dataset.from_tensor_slices((features, labels)) 
    return dataset

# Example 

In [17]:
# Create a contrived dataset 
data   = np.array([[1,  10, 100, 1000],
                   [2,  20, 200, 2000],
                   [3,  30, 300, 3000],
                   [4,  40, 400, 4000]])
df = pd.DataFrame(data, columns=['F1', 'F2', 'F3', 'Target'])
print('\nThis is the the original dataframe (contrived data for illustration only): ')
display(df)

# Create the windowed dataset 
dataset = generate_windowed_ds(df, win_w = 3, win_shift = 1)
print('--'*20)
print('\nThis is the windowed dataset: ')
for x,y in dataset:
    print(x.numpy(), y.numpy())


This is the the original dataframe (contrived data for illustration only): 


Unnamed: 0,F1,F2,F3,Target
0,1,10,100,1000
1,2,20,200,2000
2,3,30,300,3000
3,4,40,400,4000


----------------------------------------

This is the windowed dataset: 
[[  1  10 100]
 [  2  20 200]
 [  3  30 300]] [3000]
[[  2  20 200]
 [  3  30 300]
 [  4  40 400]] [4000]
