In [114]:
import numpy as np
import pandas as pd
import logging
import os
from scipy import interpolate

In [119]:
def preProcess(cwd):
    fileNames = os.listdir(cwd)
    clearedData = pd.ExcelWriter('clearedData.xlsx')
    for f in fileNames:
        if os.path.splitext(f)[1] == '.xlsx':
            ff = os.path.join('data', f)
            df = pd.read_excel(ff, 'Sheet1')
            time = df.loc[:,'TIME'].values[1:]
            time1 = df.loc[:, 'TIME.1'].values[1:]
            
            time = np.array(time)[~np.isnan(time)]
            time1 = np.array(time1)[~np.isnan(time1)]
            
            (timeEps, timeLP) = (time, time1) if len(time) < len(time1) else (time1, time)
            eps = df.loc[:, 'EPS'].values[1:]
            eps = np.array(eps)[~np.isnan(eps)]
            
            #timeLP = np.array(timeLP)[np.where(timeLP < np.max(timeEps))]
            #wprint(timeLP)
            timeLP = timeLP[timeLP < np.max(timeEps)]
            funcInterp = interpolate.interp1d(timeEps, eps, bounds_error=None)
            epsFullLength = funcInterp(timeLP).reshape(-1,1)
            timeLP = timeLP.reshape(-1,1)
            LP = df.loc[:,'LP'].values[1:]
            LP = LP[0:len(timeLP)].reshape(-1,1)
            
            TRI = df.loc[:,'TRI'].values[1:]
            TRI = TRI[0:len(timeLP)].reshape(-1, 1)

            data = np.hstack([timeLP, epsFullLength, LP, TRI])
            dff = pd.DataFrame(data, columns=['TIME', 'EPS', 'LP', 'TRI'])
            dff.to_excel(clearedData, sheet_name=os.path.splitext(f)[0])
    clearedData.save()

In [124]:
def readXlsx(fileName):
    """
    read all sheets in xls file
    input:
        fileName
    output:
        listEta, listThetaBar, listEpsilon in sheets
    """
    xlsx = pd.ExcelFile(fileName)
    etaThetaEpsilon = []
    for sheet in xlsx.sheet_names:
        df = pd.read_excel(xlsx, sheet)
        eta = df.loc[:, 'TRI'].values.reshape(-1, 1)
        thetaBar = df.loc[:, 'LP'].values.reshape(-1, 1)
        eplison = df.loc[:, 'EPS'].values.reshape(-1, 1)
        ete = np.hstack([eta, thetaBar, eplison])
        etaThetaEpsilon.append(ete)
    
    logging.debug(etaThetaEpsilon)
    return np.array(etaThetaEpsilon)

In [125]:
tests = readXlsx('clearedData.xlsx')
tests

array([array([[0.      , 0.      , 0.      ],
       [0.      , 0.      , 0.      ],
       [0.      , 0.      , 0.      ],
       ...,
       [0.81169 , 0.99731 , 0.355796],
       [0.80737 , 0.99868 , 0.356196],
       [0.78827 , 0.99426 , 0.356592]]),
       array([[0.     , 0.     , 0.     ],
       [0.     , 0.     , 0.     ],
       [0.     , 0.     , 0.     ],
       ...,
       [0.67932, 0.99067, 0.697  ],
       [0.67933, 0.99072, 0.69799],
       [0.67935, 0.99076, 0.69899]]),
       array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-4.57160000e-01, -5.75220000e-01,  0.00000000e+00],
       [ 5.10720000e-01,  8.72550000e-01,  0.00000000e+00],
       [ 4.27700000e-01,  9.26720000e-01,  0.00000000e+00],
       [ 4.15610000e-01,  8.80250000e-01,  0.00000000e+00],
       [ 4.12770000e-01,  8.59630000e-01,  0.00000000e+00],
       [ 4.11770000e-01,  8.54090000e-01,  0.00000000e+00],
       [ 4.11240000e-01,  8.48100000e-01,  0.00000000e+00],
       [ 4.11090000e-