In [1]:
import pandas as pd
from typing import List, Tuple, Union, Set

import numpy as np

In [2]:
trainingset_folder = "D:/data_mt/09_training/"
stock_data_folder = trainingset_folder + "stocks/"
stock_potential_folder = trainingset_folder + "stocks_w_potential/"

## 00_Tools

In [3]:
# load Fundamental Data
def load_reports(tickers: List[str]):
    df = pd.read_csv(trainingset_folder + "company_reports.csv", header=0)
    df.period = pd.to_datetime(df.period)
    df.filed = pd.to_datetime(df.filed)
    
    
    df['i_date'] = df.filed
    df.set_index('i_date', inplace = True)
    df.sort_index(inplace = True)

    return df[df.ticker.isin(tickers)]

In [137]:
def load_stock_history(ticker: str):
    df = pd.read_csv(stock_potential_folder + ticker[0] + "/" + ticker + ".csv")
    df.Date = pd.to_datetime(df.Date)
    
    df = df[df.Date > "2012-01-01"]
    df['i_date'] = df.Date
    df.set_index('i_date', inplace=True)
    df.sort_index(inplace = True)
        
    return df

In [5]:
def load_additional_info() -> pd.DataFrame:
    return pd.read_csv(trainingset_folder + "company_info.csv", sep=',', encoding='utf-8', header=0)

## 01_Basics

In [180]:
features_reports = ['r_workcapToAssets', 'r_currentRatio', 'r_deptToEquity', 'r_deptRatio', 'r_netProfitMargin', 'r_operatingMargin', 'r_netIncomeMargin', 
            'r_cashGenPowerRatio', 'r_extFinancingIndexRatio', 'r_returnOnEquity', 'r_returnOnAssets', 'r_assetsTurnoverRatio', 'r_dividendPayoutRatio', 
            'r_stockRepurchaseRatio', 'r_operatingCashFlowRatio', 'r_assetEfficiencyRatio', 'r_currentLiabCoverageRatio', 'r_longtermDeptCoverageRatio',
            
            # 'gr_revenue_n', 'gr_asscur_n', 'gr_assnoncur_n','gr_liabcur_n', 'gr_liabnoncur_n',
            
            'gr_revenue_p', 'gr_grosspr_n', 'gr_grosspr_p', 'gr_opiincome_n', 'gr_opiincome_p', 'gr_netincome_n', 'gr_netincome_p', 
            'gr_earnings_n', 'gr_earnings_p', 'gr_equity_n', 'gr_equity_p', 
            'gr_asscur_p', 'gr_assnoncur_p', 'gr_liabcur_p',
            'gr_liabnoncur_p', 'gr_cashfrominv_n', 'gr_cashfrominv_p', 
            'gr_cashfromfin_n', 'gr_cashfromfin_p', 'gr_cashfromope_n', 'gr_cashfromope_p', 
            'gr_dividends_n', 'gr_dividends_p',
            
            'fp__FY', 'fp__Q1', 'fp__Q2', 'fp__Q3', 'fp__Q4', ]

features_stockdata = ['high_norm', 'low_norm', 'open_norm', 'volume_norm', 'close_chg', 'volume_chg',
                     'day_of_week', 'day_of_month', 'day_of_year', 'week_of_year', 'month_of_year',
                     'pr_p2e_norm', 'pr_p2b_norm', 'pr_p2egr_1y_norm'
                     ]

features_add_info = [
            'sec__BasicMaterials', 'sec__CommunicationServices', 'sec__ConsumerCyclical', 'sec__ConsumerDefensive', 'sec__Energy', 'sec__FinancialServices',
            'sec__Healthcare',  'sec__IndustrialGoods', 'sec__Industrials', 'sec__RealEstate', 'sec__Technology', 'sec__Utilities',
            
            'ind__AerospaceDefense', 'ind__ApparelRetail', 'ind__AssetManagement', 'ind__AutoParts', 'ind__BanksRegional', 'ind__Biotechnology', 
            'ind__CommunicationEquipment',              'ind__CreditServices', 'ind__DiagnosticsResearch', 'ind__ElectronicComponents', 
            'ind__InformationTechnologyServices', 'ind__InsurancePropertyCasualty', 'ind__MedicalDevices', 'ind__MedicalInstrumentsSupplies',
            'ind__OilGasEP', 'ind__OilGasEquipmentServices', 'ind__OilGasMidstream', 'ind__Other', 'ind__PackagedFoods', 'ind__Restaurants',
            'ind__ScientificTechnicalInstruments', 'ind__SemiconductorEquipmentMaterials', 'ind__Semiconductors', 'ind__SoftwareApplication',
            'ind__SoftwareInfrastructure', 'ind__SpecialtyBusinessServices', 'ind__SpecialtyChemicals', 'ind__SpecialtyIndustrialMachinery',
            'ind__SpecialtyRetail',

            'mc_top10', 'mc_top20', 'mc_top30', 'mc_top50','mc_top100','mc_top200']

tabular_features = list(features_stockdata)
tabular_features.extend(features_add_info)

label = 'r_potential'

In [183]:
len(features_add_info)

47

In [201]:
tickers_to_process = ['AAPL', 'MSFT']

In [202]:
# laod all reports
reports_df = load_reports(tickers_to_process)
reports_df.shape

(68, 85)

In [10]:
from sklearn.preprocessing import MinMaxScaler
reports_features_scaler = MinMaxScaler()
reports_features_scaler.fit(reports_df[features_reports])

MinMaxScaler()

In [11]:
add_info = load_additional_info()

### Params

In [115]:
nr_of_features = len(features_reports)
lookback_size = 8

## 02_build dataset

- sec daten history
- stockdaten
- neue features
- industrie / sektor

In [169]:
def create_sequence_series(reports_ticker_df: pd.DataFrame) -> pd.Series:
    """
    creates the timeseries arrays based on the features in the provided dataframe.
    returns them as a pandas series, so that they can be merged with other data
    """
    # scale the data, float32 should be enough
    np_feature_rows = reports_features_scaler.transform(reports_ticker_df[features_reports]).astype(np.float32)
    
    nr_of_rows      = np_feature_rows.shape[0]
    nr_of_sequences = nr_of_rows - lookback_size + 1 # example: if there are 8 rows and the lookback size is 8, we can create 1 timeseries arr

    # create empty array to hold the data
    feature_seq_arr = np.zeros((nr_of_sequences, lookback_size, nr_of_features))
    
    # create the timeseries (sequences) arrays
    for i in range(0, nr_of_sequences):
        inputs[i] = np_feature_rows[i : i + lookback_size]
    
    # convert into pandas series, there might be a more efficient way, but it works
    flat_input = inputs.reshape(-1, lookback_size * nr_of_features)    
    pd_features_col = pd.Series(flat_input.tolist(), name='features').apply(lambda field: np.array(field).reshape(lookback_size, nr_of_features))

    return pd_features_col

In [154]:
features_to_forward = ['filed', 'c_NetIncomeLoss', 'c_PaymentsOfDividendsTotal', 'Assets', 'Liabilities', 'gr_netincome_p']

def merge_report_data_with_sequence_series(reports_ticker_df: pd.DataFrame, pd_features_col: pd.Series) -> pd.DataFrame:
    
    # only select those cols, which are needed for the further processing, drop index, so that the feature col can be added
    reports_ticker_merge_df = reports_ticker_df[lookback_size - 1:][features_to_forward].reset_index(drop=True)
    
    df_combined = pd.concat([reports_ticker_merge_df, pd_features_col], axis=1)
    df_combined['i_date'] = df_combined.filed
    df_combined.set_index('i_date', inplace=True)
    
    return df_combined

In [155]:
def merge_stock_data(pd_combined_df: pd.DataFrame, stock_data_df: pd.DataFrame) -> pd.DataFrame:
    df_with_stock = pd.merge(pd_combined_df, stock_data_df, left_index=True, right_index=True, how='outer')
    df_with_stock.sort_index(inplace=True)
    
    df_with_stock = df_with_stock.fillna(method="ffill")
    df_with_stock = df_with_stock.dropna(subset=['filed', 'Date'])
    return df_with_stock

In [159]:
def create_price_ratio_features_in_place(combined_data: pd.DataFrame, shares_outstanding:int):
    """Calculates stock price depending features. 
    """
    
    combined_data['pr_p2e'] = combined_data.Close * shares_outstanding / (
            combined_data.c_NetIncomeLoss - combined_data.c_PaymentsOfDividendsTotal)
    
    combined_data['pr_p2b'] = combined_data.Close * shares_outstanding / (
            combined_data.Assets - combined_data.Liabilities)
    
    combined_data['pr_p2egr_1y'] = combined_data.pr_p2e / (
            combined_data.gr_netincome_p * 100)  # approximated / denominator in percent

    # caping p2e: in order to prevent meaningless values, we need to restrict the range. The max value is limited to 100.
    # if new_df.c_NetIncomeLoss - new_df.c_PaymentsOfDividendsTotal results in a negativ value, we set p2e to 100, which is rather a "bad" value.
    combined_data.loc[(combined_data.pr_p2e < 0) | (combined_data.pr_p2e > 100), 'pr_p2e'] = 100
    
    # caping p2egr: the lower the better. generally you would like to see a ratio lower than 1, so a 5 can be considered a really bad value so we restrict it to 5
    # if growth number is 0 or less, we set p2egr to 5
    combined_data.loc[(combined_data.pr_p2egr_1y > 5) | (combined_data.pr_p2egr_1y <= 0.0), 'pr_p2egr_1y'] = 5
    
    # normalize
    combined_data['pr_p2e_norm'] = combined_data['pr_p2e'] / 100  # 100 is the max as defined above
    combined_data['pr_p2b_norm'] = combined_data['pr_p2b'] / 100  # p2b of 100 is pretty extreme, values above are very rare
    combined_data['pr_p2egr_1y_norm'] = combined_data['pr_p2egr_1y'] / 5  # 5 is max as defined above

In [160]:
def build_set_for_ticker(ticker) -> pd.DataFrame:
    add_info_ticker = add_info[add_info.ticker == ticker]
    reports_ticker_df = reports_df[reports_df.ticker == ticker]
    stock_data_df = load_stock_history(ticker)
      
    pd_features_col = create_sequence_series(reports_ticker_df)
    pd_combined_df = merge_report_data_with_sequence_series(reports_ticker_df, pd_features_col)
    pd_with_stock_df = merge_stock_data(pd_combined_df, stock_data_df)
    
    shares_outstanding = add_info_ticker.sharesOutstanding.to_list()[0]
    create_price_ratio_features_in_place(pd_with_stock_df, shares_outstanding)
    
    pd_complete_df = pd.merge(pd_with_stock_df, add_info_ticker, how="outer", on="ticker")
    
    return pd_complete_df

In [190]:
def build_train_testset(tickers: List[str]):
    
    # traindaten < 1.1.2018
    # traindaten nur jedes 2. nehmen
    # traindaten label -> muss am Ende normalisiert werden
    # => 3 numpy Arrays
    #    timeseries arrays
    #    tables arrays
    #    labels arrays
    # testdaten >= 1.1.2018 -> können als Pandas bleiben, oder?
    
    train_series_x   = [] # timeseries features
    train_features_x = [] # tabular features
    train_y          = [] # target label
    train_pd_list    = [] # list t
    
    test_pd_list     = []
    
    
    for ticker in tickers:
        pd_complete_ticker = build_set_for_ticker(ticker)
        pd_train_ticker = pd_complete_ticker[pd_complete_ticker.filed < "2018-01-01"]
        pd_test_ticker = pd_complete_ticker[pd_complete_ticker.filed  >= "2018-01-01"]
        
        
        test_pd_list.append(pd_complete_ticker[pd_complete_ticker.filed >= "2018-01-01"])
        

        
        arr_of_arr_of_series_features = pd_train_ticker.features.to_numpy()
        train_series_x_ticker = np.concatenate(arr_of_arr_of_series_features) \
                                  .reshape(arr_of_arr_of_series_features.shape[0], lookback_size, nr_of_features)
    
        train_features_x_ticker = pd_train_ticker[tabular_features].to_numpy()
        
        train_y_ticker = pd_train_ticker.r_potential.to_numpy()
        
        if len(train_y) == 0:
            train_series_x   = train_series_x_ticker
            train_features_x = train_features_x_ticker
            train_y          = train_y_ticker
        else:
            train_series_x   = np.concatenate((train_series_x, train_series_x_ticker))
            train_features_x = np.concatenate((train_features_x, train_features_x_ticker))
            train_y          = np.concatenate((train_y, train_y_ticker))
    
    return train_series_x, train_features_x, train_y, test_pd_list

In [203]:
train_series_x, train_features_x, train_y, test_list = build_train_testset(['AAPL', 'MSFT'])
print(train_series_x.shape)
print(train_features_x.shape)
print(train_y.shape)
print(len(test_list))

# Todo: Label skalieren und testset builden .. 

(2025, 8, 46)
(2025, 61)
(2025,)
2


## label am schluss skalieren !

In [200]:
aapl_df = reports_df[reports_df.ticker == 'AAPL']
print(aapl_df.shape)
msft_df = reports_df[reports_df.ticker == 'MSFT']
print(msft_df.shape)

(34, 85)
(0, 85)


In [177]:
build_set_for_ticker('AAPL').r_potential.to_numpy().shape

(1731,)

In [174]:
np_arr_of_arr = build_set_for_ticker('AAPL').features.to_numpy()
np_arr_of_arr.shape

(1731,)

In [175]:
np.concatenate(np_arr_of_arr).reshape(np_arr_of_arr.shape[0], lookback_size, nr_of_features).shape

(1731, 8, 46)

In [199]:
create_sequence_series(msft_df)

ValueError: Found array with 0 sample(s) (shape=(0, 46)) while a minimum of 1 is required.

## XX_Trials

In [107]:
np_df_features = reports_features_scaler.transform(reports_df[features_reports]).astype(np.float32)
print(np_df_features.dtype)
np_df_features.shape

float32


(34, 46)

In [79]:
lookback_size = 8
number_of_features = len(features_reports)
number_of_rows = len(np_df_features)

number_of_sequences = number_of_rows - lookback + 1

inputs = np.zeros((number_of_sequences, lookback, number_of_features))
print(inputs.shape)

(27, 8, 46)


In [96]:
for i in range(0, number_of_sequences):
    inputs[i] = np_df_features[i : i + lookback]
    
print (inputs.shape)

(27, 8, 46)


In [98]:
flat_input = inputs.reshape(-1, lookback_size * number_of_features)
flat_input.shape

(27, 368)

In [102]:
features_col = pd.Series(flat_input.tolist(), name='features').apply(lambda field: np.array(field).reshape(lookback, number_of_features))
features_col.shape

(27,)

In [103]:
date_col = reports_df.filed[lookback - 1:].reset_index(drop=True)
date_col.shape

(27,)

In [104]:
 df_combined = pd.concat([date_col, features_col], axis=1)

Unnamed: 0,filed,features
0,2014-01-28,"[[0.7868078389231806, 0.6630857047421554, 0.01..."
1,2014-04-24,"[[0.776629286748127, 0.6496306048392506, 0.006..."
2,2014-07-23,"[[0.7078445773271538, 0.5637078809160074, 0.01..."
3,2014-10-27,"[[0.7519515180156731, 0.6178265461170032, 0.03..."
4,2015-01-28,"[[0.9394095609049016, 0.8921025625874786, 0.0,..."
5,2015-04-28,"[[0.9999999999999999, 1.0000000000000002, 0.06..."
6,2015-07-22,"[[0.8640773461987664, 0.7722151793702814, 0.08..."
7,2015-10-28,"[[0.706357307915412, 0.5619409510885764, 0.101..."
8,2016-01-27,"[[0.8280166003209201, 0.7196892650544637, 0.09..."
9,2016-04-27,"[[0.6832097720805139, 0.5349050497826269, 0.13..."


In [88]:
np_df_features

array([[0.78680784, 0.6630857 , 0.0119307 , ..., 1.        , 0.        ,
        0.        ],
       [0.77662929, 0.6496306 , 0.00691177, ..., 0.        , 1.        ,
        0.        ],
       [0.70784458, 0.56370788, 0.01764061, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.79937715, 0.67998255, 0.80159741, ..., 0.        , 0.        ,
        0.        ],
       [0.70795353, 0.56383747, 0.89674084, ..., 1.        , 0.        ,
        0.        ],
       [0.68206885, 0.53359463, 1.        , ..., 0.        , 1.        ,
        0.        ]])

In [83]:
new_df = pd.DataFrame(np_df_features)
new_df.shape

(34, 46)

In [90]:
filed_col = reports_df.filed.reset_index(drop=True)
print(filed_col.shape)
result = pd.concat([filed_col, new_df], axis=1, ignore_index=True)
result

(34,)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,37,38,39,40,41,42,43,44,45,46
0,2012-04-25,0.786808,0.663086,0.011931,0.035571,1.0,1.0,1.0,1.0,1.0,...,1.0,0.0,0.353778,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2012-07-25,0.776629,0.649631,0.006912,0.020817,0.565625,0.65095,0.603353,0.0,0.997016,...,1.0,0.0,0.353778,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,2012-10-31,0.707845,0.563708,0.017641,0.051999,0.66649,0.777428,0.733998,1.0,0.968144,...,1.0,0.0,0.353778,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,2013-01-24,0.751952,0.617827,0.034704,0.098949,0.167814,0.568224,0.49638,0.622115,0.856895,...,1.0,0.0,0.353778,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,2013-04-24,0.93941,0.892103,0.0,0.0,0.059409,0.412637,0.310148,1.0,0.85987,...,0.0,0.0,0.538709,0.0,0.0,0.0,0.0,1.0,0.0,0.0
5,2013-07-24,1.0,1.0,0.061922,0.167797,0.0,0.258043,0.100323,1.0,0.849345,...,0.0,0.0,0.41853,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6,2013-10-30,0.864077,0.772215,0.080637,0.211303,0.071725,0.405263,0.290171,1.0,0.777455,...,1.0,0.0,0.427019,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,2014-01-28,0.706357,0.561941,0.101279,0.256077,0.100912,0.498011,0.381372,0.0,0.751768,...,1.0,0.0,0.311,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8,2014-04-24,0.828017,0.719689,0.093698,0.240003,0.232948,0.467587,0.354666,0.848017,0.446036,...,1.0,0.0,0.364034,0.0,0.0,0.0,0.0,1.0,0.0,0.0
9,2014-07-23,0.68321,0.534905,0.136333,0.325314,0.237416,0.337811,0.203833,0.0,0.656839,...,1.0,0.0,0.435719,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [85]:
# funktioniert
filed_col = reports_df.filed.reset_index().drop(['i_date'], axis = 1).filed
print(filed_col.shape)
result = pd.concat([filed_col, new_df], axis=1, ignore_index=True)
result

(34,)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,37,38,39,40,41,42,43,44,45,46
0,2012-04-25,0.786808,0.663086,0.011931,0.035571,1.0,1.0,1.0,1.0,1.0,...,1.0,0.0,0.353778,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2012-07-25,0.776629,0.649631,0.006912,0.020817,0.565625,0.65095,0.603353,0.0,0.997016,...,1.0,0.0,0.353778,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,2012-10-31,0.707845,0.563708,0.017641,0.051999,0.66649,0.777428,0.733998,1.0,0.968144,...,1.0,0.0,0.353778,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,2013-01-24,0.751952,0.617827,0.034704,0.098949,0.167814,0.568224,0.49638,0.622115,0.856895,...,1.0,0.0,0.353778,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,2013-04-24,0.93941,0.892103,0.0,0.0,0.059409,0.412637,0.310148,1.0,0.85987,...,0.0,0.0,0.538709,0.0,0.0,0.0,0.0,1.0,0.0,0.0
5,2013-07-24,1.0,1.0,0.061922,0.167797,0.0,0.258043,0.100323,1.0,0.849345,...,0.0,0.0,0.41853,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6,2013-10-30,0.864077,0.772215,0.080637,0.211303,0.071725,0.405263,0.290171,1.0,0.777455,...,1.0,0.0,0.427019,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,2014-01-28,0.706357,0.561941,0.101279,0.256077,0.100912,0.498011,0.381372,0.0,0.751768,...,1.0,0.0,0.311,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8,2014-04-24,0.828017,0.719689,0.093698,0.240003,0.232948,0.467587,0.354666,0.848017,0.446036,...,1.0,0.0,0.364034,0.0,0.0,0.0,0.0,1.0,0.0,0.0
9,2014-07-23,0.68321,0.534905,0.136333,0.325314,0.237416,0.337811,0.203833,0.0,0.656839,...,1.0,0.0,0.435719,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [71]:
df1 = pd.DataFrame(
     {
         "A": ["A0", "A1", "A2", "A3"],
         "B": ["B0", "B1", "B2", "B3"],
         "C": ["C0", "C1", "C2", "C3"],
         "D": ["D0", "D1", "D2", "D3"],
     },
     index=[0, 1, 2, 3],
 )

s1 = pd.Series(["X0", "X1", "X2", "X3"], name="X")

result = pd.concat([df1, s1], axis=1)

In [72]:
result

Unnamed: 0,A,B,C,D,X
0,A0,B0,C0,D0,X0
1,A1,B1,C1,D1,X1
2,A2,B2,C2,D2,X2
3,A3,B3,C3,D3,X3


### reshaping

In [55]:
input_np = np.arange(1, 33).reshape(8,4)
data_rows = input_np.shape[0]
input_np

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16],
       [17, 18, 19, 20],
       [21, 22, 23, 24],
       [25, 26, 27, 28],
       [29, 30, 31, 32]])

In [57]:
test_lookback = 3
nr_of_sequences = data_rows - test_lookback + 1

test_inputs = np.zeros((nr_of_sequences, test_lookback, 4))
print(test_inputs.shape)

(6, 3, 4)


In [52]:
#for i in range(lookback, len(np_df_features)):
#    inputs[i-lookback] = np_df_features[i-lookback:i]

In [63]:
for i in range(0, nr_of_sequences):
    test_inputs[i] = input_np[i : i + test_lookback]

In [64]:
print(test_inputs.shape)
test_inputs

(6, 3, 4)


array([[[ 1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.]],

       [[ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.],
        [13., 14., 15., 16.]],

       [[ 9., 10., 11., 12.],
        [13., 14., 15., 16.],
        [17., 18., 19., 20.]],

       [[13., 14., 15., 16.],
        [17., 18., 19., 20.],
        [21., 22., 23., 24.]],

       [[17., 18., 19., 20.],
        [21., 22., 23., 24.],
        [25., 26., 27., 28.]],

       [[21., 22., 23., 24.],
        [25., 26., 27., 28.],
        [29., 30., 31., 32.]]])

In [65]:
test_inputs = test_inputs.reshape(-1, test_lookback, 4)
print (test_inputs.shape)
test_inputs

(6, 3, 4)


array([[[ 1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.]],

       [[ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.],
        [13., 14., 15., 16.]],

       [[ 9., 10., 11., 12.],
        [13., 14., 15., 16.],
        [17., 18., 19., 20.]],

       [[13., 14., 15., 16.],
        [17., 18., 19., 20.],
        [21., 22., 23., 24.]],

       [[17., 18., 19., 20.],
        [21., 22., 23., 24.],
        [25., 26., 27., 28.]],

       [[21., 22., 23., 24.],
        [25., 26., 27., 28.],
        [29., 30., 31., 32.]]])

In [68]:
flat = test_inputs.reshape(-1, test_lookback * 4)
flat

array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.],
       [ 5.,  6.,  7.,  8.,  9., 10., 11., 12., 13., 14., 15., 16.],
       [ 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.],
       [13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.],
       [17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28.],
       [21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.]])

In [71]:
flat.tolist()

[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0],
 [5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0],
 [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0],
 [13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0],
 [17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0],
 [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0]]

In [72]:
pd.Series(flat.tolist())

0    [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...
1    [5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13...
2    [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0...
3    [13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20....
4    [17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24....
5    [21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28....
dtype: object

In [94]:
pd.Series(flat.tolist()).apply(lambda field: np.array(field).reshape(test_lookback, 4))

0    [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [...
1    [[5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]...
2    [[9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 1...
3    [[13.0, 14.0, 15.0, 16.0], [17.0, 18.0, 19.0, ...
4    [[17.0, 18.0, 19.0, 20.0], [21.0, 22.0, 23.0, ...
5    [[21.0, 22.0, 23.0, 24.0], [25.0, 26.0, 27.0, ...
dtype: object