In [1]:
import pandas as pd
import numpy as np

## Main Function

In [2]:
def df_2D_to_3D(df, n_lag: int, n_predict: int):
    """ 
    Turn a dataframe into a 3d data suitable for LSTM.

    Args:
    df: Our dataset (no date or other unncessary columns)
    n_lag: Number of past days we want to use to predict the future.
    n_predict: Number of days we want to look into the future based on the past days.

    Returns:
    trainX: a 3rd array containing our training dataset
    trainY: a 2d array containing our price values (labels)
    """
    trainX = []
    trainY = []
    arr_data = np.array(df)
    for i in range(n_lag, len(arr_data) - n_predict +1):
        trainX.append(arr_data[i - n_lag : i, 0 : arr_data.shape[1]])
        trainY.append(arr_data[i : i + n_predict])
    trainX, trainY = np.array(trainX), np.array(trainY)
    return trainX, trainY

In [3]:
df_data = pd.read_csv('GE-2.csv')
print(f"shape of the dataset: {df_data.shape}")
print(f"list of columns: {list(df_data.columns)}")
df_data

shape of the dataset: (1152, 7)
list of columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-01-03,30.451923,30.615385,30.192308,30.471153,28.363588,33435480
1,2017-01-04,30.528847,30.605768,30.403847,30.480768,28.372541,22300616
2,2017-01-05,30.355768,30.528847,30.105768,30.307692,28.211433,26891072
3,2017-01-06,30.365385,30.548077,30.153847,30.394232,28.291990,23005632
4,2017-01-09,30.423077,30.442308,30.221153,30.250000,28.157734,22113000
...,...,...,...,...,...,...,...
1147,2021-07-26,12.660000,12.940000,12.630000,12.920000,12.920000,53828900
1148,2021-07-27,13.330000,13.470000,12.800000,13.080000,13.080000,119444300
1149,2021-07-28,13.150000,13.480000,13.040000,13.130000,13.130000,68568300
1150,2021-07-29,13.190000,13.380000,13.090000,13.290000,13.290000,54276500


In [4]:
# dropping the Data column
df_data_new = df_data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
# Adding a syntetic price
df_data_new['price'] = 100

In [5]:
n_predict = 1   
n_lag = 14  
trainX, trainY = df_2D_to_3D(df=df_data_new , n_lag=n_lag, n_predict=n_predict)
print(f"trainX shape: {trainX.shape}, trainY shape: {trainY.shape}")

trainX shape: (1138, 14, 7), trainY shape: (1138, 1, 7)


### Checking the result

In [6]:
len(np.array(df_data_new)) - n_lag + 1

1139