In [None]:
import numpy as np
import pandas as pd

In [None]:
def convert_to_sequence_uni_output(data = None, timesteps = None, step_size = None, forecast = None, sep_target = None):
    """
    Converts uni or multivariate data into sequence data ready for a sequence machine learning model.
    The target is set as the next 'forecast' number of steps in the sequence, for univariate it is the next elements in
    the input sequence, for multivariate it is the next elements in the last input feature (which is used in the X data).
    
    sep_target however changes the target to be a separate sequence from the input data if not None.
    
    - data : list, numpy array, or pandas dataframe. The input data for the machine learning model
    - timesteps : int, corresponds to number of timesteps fed to the model per sample
    - step_size : int, the interval between the first element of each sample e.g step_size of 2, [0,1,2...], [2,3,4...], 
                  [4,5,6..]
    - forecast : the number of elements next in the sequence to forecast
    - sep_target : list, numpy array, or pandas dataframe. A separate target variable from the input data
    """
    # Prepare output arrays
    samples = list() # predictor X
    targets = list() # target Y
    
    # Convert input and target data into dataframes
    data = pd.DataFrame(data)
    if isinstance(sep_target, type(None)) == False:
        sep_target = pd.DataFrame(sep_target)
        if sep_target.shape[1] != 1:
            raise TypeError ('sep_target must be a univariate list, numpy array, or pandas dataframe.')
    
    # Generate settings based on data shape
    n = data.shape[0] # size of the data
    if data.shape[1] == 1:
        data_class = 'univariate'
        features = 1
    elif data.shape[1] > 1:
        data_class = 'multivariate'
        features = data.shape[1]
    else:
        raise TypeError ('Data structure/format/type not recognised. Please use a numpy array or pandas dataframe.')
    
    if data_class == 'univariate':
        for i in range(0,n,step_size): # for n many samples of length timesteps with step_size between samples
            sample = [x[0] for x in data.iloc[i:i+timesteps,:].values.tolist()]
            try:
                if isinstance(sep_target, type(None)) == True:
                    target = data.iloc[i+timesteps:i+timesteps+forecast,0].values
                else:
                    target = sep_target.iloc[i+timesteps:i+timesteps+forecast].values
                if len(target) == 1:
                    target = target[0]
                targets.append(target)
            except:
                break
            samples.append(sample)

        samples = pd.DataFrame(samples)
        data = np.array(samples)
        data = data.reshape((len(samples), timesteps, features))
        return data, np.array(targets)

    elif data_class == 'multivariate':
        for i in range(0,n,step_size): # for n many samples of length timesteps with step_size between samples
            sample = data.iloc[i:i+timesteps,:].values
            try:
                if isinstance(sep_target, type(None)) == True:
                    target = data.iloc[i+timesteps:i+timesteps+forecast,-1].values
                else:
                    target = sep_target.iloc[i+timesteps:i+timesteps+forecast].values
                if len(target) == 1:
                    target = target
                targets.append(target)
            except:
                break
            samples.append(sample)

        data = np.array(samples[0].reshape(1, len(samples[0]), features))
        for s in samples[1:]:
            try:
                data = np.append(data, s.reshape(1, len(s), features), axis = 0)
            except:
                continue
        return data, np.array(targets)