In [83]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
from datetime import datetime as tm
import datetime
import seaborn as sns
t = time.time()

In [84]:
### converts varying datatypes into unix time for easier computation
def convert2unix(time):
    ### for string format
    if type(time) == type('hey'):
        time = (tm.strptime(time,'%Y-%m-%dT%H:%M:%S') - tm(1970,1,1,0,0,0)).total_seconds()
        #time = tm.strptime(time,'%m/%d/%y %H:%M:%S %p').timestamp()
    ### for datetime format
    elif type(time) == tm:
        time = time.timestamp()
        print('tm')
    ### this is already in unix time
    elif type(time) == float:
        #print(time)
        pass
    ### for pandas format
    elif type(time) == pd._libs.tslibs.timestamps.Timestamp:
        time = time.timestamp()
    ### notifies the user that the format is not recognized
    else:
        print('NONE OF THE ABOVE')
        print(type(time))
    return time


In [85]:


### $ Filename imported by field engineer
filename = 'C:/Users/sambe/Desktop/Chevron Project/chevron_test_template.xlsx'
### list of all possible motor variable inputs
allvars = ['Inlet Flowrate',
 'Inlet Pressure',
 'Outlet Pressure',
 'Inlet Temperature',
 'Column Level',
 'STD API',
 'Pump Driver Vibration',
 'Pump Vibration',
 'Sister Pump Driver Vibration',
 'Sister Pump Vibration']
### $ Convert to pandas data frame
df = pd.read_excel(filename, sheet_name = 'Time-Series Data')
print(f'Elapsed time is {(time.time() - t)/60} minutes')
### grab columns from input dataframe
a = df.columns

### $ Get the number of rows and columns in the file (# of covariates and sampling will probably be different)
number_rows = len(df.index)
number_cols = len(df.columns)


Elapsed time is 1.4804417610168457 minutes


In [86]:
### $ Create empty data frames with the three points per covariate (value, times, good/bad)
variables = pd.DataFrame([])
times = pd.DataFrame()
goodbad = pd.DataFrame()

In [87]:
### formatting
df.columns = df.iloc[0].fillna('Unnamed')
df.drop(0, inplace = True)
df.reset_index(drop = True, inplace = True)
df

Unnamed: 0,Pen Name:,16FI226,Units: MBPD,Unnamed,Pen Name:.1,16pi2115,Units: PSIG,Unnamed.1,Pen Name:.2,16pi2115.1,...,16ve1133,Units: IN/SEC,Unnamed.2,EMPTY 1,EMPTY 2,EMPTY 3,Unnamed.3,EMPTY 1.1,EMPTY 2.1,EMPTY 3.1
0,,,,,,,,,,,...,,,,,,,,,,
1,2038.28,2018-02-20T15:01:30,GOOD,,44.0469,2018-02-20T15:01:30,GOOD,,5574.77,2018-02-20T15:01:30,...,2018-02-20T15:01:30,GOOD,,,,,,,,
2,2039.22,2018-02-20T15:01:59,GOOD,,44.0703,2018-02-20T15:01:59,GOOD,,5565.62,2018-02-20T15:01:59,...,2018-02-20T15:01:59,GOOD,,,,,,,,
3,2039.06,2018-02-20T15:02:29,GOOD,,44.1953,2018-02-20T15:02:29,GOOD,,5623.01,2018-02-20T15:02:29,...,2018-02-20T15:02:58,GOOD,,,,,,,,
4,2039.22,2018-02-20T15:03:29,GOOD,,44.0469,2018-02-20T15:03:29,GOOD,,5582.38,2018-02-20T15:02:58,...,2018-02-20T15:05:26,BAD,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
507005,,,,,,,,,,,...,,,,,,,,,,
507006,,,,,,,,,,,...,,,,,,,,,,
507007,,,,,,,,,,,...,,,,,,,,,,
507008,,,,,,,,,,,...,,,,,,,,,,


In [88]:
### $ Sort out whole excel file to place the column in the right data frame (value, times, good/bad)
### $ Relabeled with Variable i names
### $ Iterates over the number of columns
t = time.time()
missingcols = []
counter = 0
for i in range(number_cols):
    ### $ Puts column title into str
    str = df.columns[i]
    ### $ if searches for a title that starts with 'Pen Name'
    if str.startswith('Pen Name'):
        ### $ takes variables from column and puts in separate 'variables' dataframe
        s = df.iloc[1:,i]
        variables = pd.concat([variables,s.rename('Value')], axis = 1)
    ### $ elif searches for a title that starts with 'Units'
    elif str.startswith('Units'):
        ### $ takes variables from column and puts in separate 'goodbad' dataframe
        s = df.iloc[1:,i]
        goodbad = pd.concat([goodbad,s.rename('GOOD/BAD')],axis = 1)
    ### $ This is for the blank columns in between variable columns
    elif str.startswith('Unnamed'):
        ### $ Skips over empty columns
        continue
    elif str.startswith('EMPTY 1'):
        variables = pd.concat([variables,pd.DataFrame([np.nan])],axis = 1)
    elif str.startswith('EMPTY 2'):
        times = pd.concat([times,pd.DataFrame([np.nan])],axis = 1)
        missingcols.append(int((i-1)/4))
    elif str.startswith('EMPTY 3'):
        goodbad = pd.concat([goodbad,pd.DataFrame([np.nan])],axis = 1)
    ### $ This is an else because the pen names vary and will be different for every pump/variable
    else:
        ### $ takes variables from column and puts in separate 'times' dataframe
        s = df.iloc[1:,i].apply(convert2unix)
        times = pd.concat([times,s.rename('Time')], axis = 1)
print(f'Elapsed time is {(time.time() - t)/60} minutes')
### relabels times, goodbad, and variables to have the same column names
times.columns = allvars
goodbad.columns = allvars
variables.columns = allvars

Elapsed time is 0.48040519952774047 minutes


In [89]:
### formatting
times.drop(0,inplace = True)
times.reset_index(drop = True, inplace = True)
variables.drop(0,inplace = True)
variables.reset_index(drop = True, inplace = True)
goodbad.drop(0,inplace = True)
goodbad.reset_index(drop = True, inplace = True)

In [90]:
### finds the available vairables for that motor (i.e. those variables that were input into the program)
availvars = []
for i in range(len(times.columns)):
    if i in missingcols:
        pass
    else:
        availvars.append(times.columns[i])

In [91]:
availvars

['Inlet Flowrate',
 'Inlet Pressure',
 'Outlet Pressure',
 'Pump Driver Vibration',
 'Pump Vibration']

In [92]:
### selects only the columns with variables in them
times = times[availvars]
goodbad = goodbad[availvars]
variables = variables[availvars]

In [93]:
### formatting for later
times = times.append(pd.Series([], dtype = np.float64),ignore_index = True)

In [94]:
### $ interpolates for BAD Values
### $ Excludes last row because you can't interpolate a machine being on or off
t = time.time()
for j in range(len(variables.columns)):
    for i in range(len(variables.iloc[:,j].dropna())):
        str = goodbad.iloc[i,j]
        ### $ If the value is bad and the value has no prior value to interpolate from, just use next good value
        ### $ Needs improvement: could back interpolate and take care of case in which there are consecutive bad values     
        if str.startswith('B'):# and i == 0:
            variables.iloc[i,j] = variables.iloc[i-1,j]
            goodbad.iloc[i,j] = 'OK'
        ### $ Else if bad, then interpolate between the two good points
#        elif str.startswith('B'):
#            t1 = times.iloc[i-1,j]
#            t2 = times.iloc[i,j]
#            t3 = times.iloc[i+1,j]
#            a = variables.iloc[i-1,j]
#            c = variables.iloc[i+1,j]
#            variables.iloc[i,j] = ((t2-t1)/(t3-t1))*(c-a)+a
#            goodbad.iloc[i,j] = 'INT'
print(f'Elapsed time is {(time.time() - t)/60} minutes')

Elapsed time is 0.7016374230384826 minutes


In [95]:
### $ Align times via binning
### $ kappa designates the grouping size
### $ Any timestamp outside of the bin will be group later



### $ Sets to dataframes: windows and shutters. windows is the number row that is being looked at for that variable. shutters is end of the length of that variable
### $ column titles designate which variable is being investigated
windows = pd.DataFrame()
shutters = pd.DataFrame()
for j in range(len(variables.columns)):
    w = pd.DataFrame([0], columns = [f'{j}'])
    windows = pd.concat([windows, w], axis = 1)
    temp = variables.iloc[:,j]
    s = pd.DataFrame([len(temp)], columns = [f'{j}'])
    shutters = pd.concat([shutters, s], axis = 1)
    
binsize = 60*2#seconds

### $ abstime is the merged and grouped times of all the variables
abstime = pd.DataFrame([])
### $ vars is the merged and grouped variables
vars = pd.DataFrame([], columns = allvars)

In [96]:
############# binsize for binning in seconds
binsize = 60*2
#############
timestart = times.dropna(thresh = 1).min().min()
timenow = timestart
timeend = times.dropna(thresh = 1).max().max()
t = time.time()

abstime = []
vars = []
### Flag denotes if the timenow is less than or equal to timeend
Flag = 1
while Flag:
    ### vlist is list of variables in that timebin
    vlist = []
    ### below iterates over columns
    for j in range(len(variables.columns)):
        ### Flag1 denotes if the end of the available times in the column has been reached
        Flag1 = 1
        ### v is the list of variables that fall within the time bin (say three flowrates occur within that two minute timebin)
        v = []
        while Flag1:
            ###
            timecomp = times.iloc[windows.iloc[0,j],j]
            if timecomp >= timenow and timecomp < timenow + binsize:
                v.append(variables.iloc[windows.iloc[0,j],j])
                windows.iloc[0,j] += 1 
            elif timecomp >= timenow + binsize:
                Flag1 = 0
            elif np.isnan(times.iloc[windows.iloc[0,j],j]):
                Flag1 = 0   

            else:
                print('HELP')
        if len(v) == 0:
            v = np.nan
        else:
            v = sum(v)/float(len(v)) 
        vlist.append(v)

        
    abstime.append(timenow)
    vars.append(vlist)
    timenow += binsize
    if timenow > timeend:
        Flag = 0
print(f'Elapsed time is {(time.time() - t)/60} minutes')

Elapsed time is 10.19702915350596 minutes


In [97]:
abstime = pd.DataFrame(abstime, columns = ['Time'])

In [98]:
vars = pd.DataFrame(vars, columns = availvars)

In [99]:
for j in range(len(vars.columns)):
    for i in range(len(vars)):
        if np.isnan(vars.iloc[i,j]) and i == 0:
            vars.iloc[i,j] = vars.iloc[i+1,j]
        elif np.isnan(vars.iloc[i,j]):
            vars.iloc[i,j] = vars.iloc[i-1,j]


In [100]:
vars

Unnamed: 0,Inlet Flowrate,Inlet Pressure,Outlet Pressure,Pump Driver Vibration,Pump Vibration
0,2038.945312,44.089844,5589.695313,0.072129,0.022539
1,2040.156250,44.150391,5585.048829,0.073216,0.022109
2,2040.937500,44.190104,5592.539063,0.074336,0.023398
3,2042.578125,44.185547,5595.966797,0.074385,0.022520
4,2042.395833,44.158203,5592.539063,0.073105,0.022721
...,...,...,...,...,...
525597,1727.500000,40.437500,7743.125000,0.121094,0.021367
525598,1727.656250,40.257812,7718.242188,0.111328,0.021367
525599,1728.593750,40.375000,7781.210938,0.105977,0.024687
525600,1726.093750,40.171875,7798.476563,0.111094,0.024687


In [101]:
a = ['Time']
for s in availvars:
    a.append(s)


In [102]:

fixed = pd.concat([abstime,vars], axis = 1)
fixed.columns = a
fixed.dropna(thresh = 2, inplace = True)



fixed.to_excel('interpolatedvar.xlsx')


In [103]:
fixed

Unnamed: 0,Time,Inlet Flowrate,Inlet Pressure,Outlet Pressure,Pump Driver Vibration,Pump Vibration
0,1.519139e+09,2038.945312,44.089844,5589.695313,0.072129,0.022539
1,1.519139e+09,2040.156250,44.150391,5585.048829,0.073216,0.022109
2,1.519139e+09,2040.937500,44.190104,5592.539063,0.074336,0.023398
3,1.519139e+09,2042.578125,44.185547,5595.966797,0.074385,0.022520
4,1.519139e+09,2042.395833,44.158203,5592.539063,0.073105,0.022721
...,...,...,...,...,...,...
525597,1.582211e+09,1727.500000,40.437500,7743.125000,0.121094,0.021367
525598,1.582211e+09,1727.656250,40.257812,7718.242188,0.111328,0.021367
525599,1.582211e+09,1728.593750,40.375000,7781.210938,0.105977,0.024687
525600,1.582211e+09,1726.093750,40.171875,7798.476563,0.111094,0.024687


In [104]:
filename = 'C:/Users/sambe/Desktop/Chevron Project/chevron_test_template.xlsx'
faildata = pd.read_excel(filename, sheet_name = 'Failure Mode & Date')

In [105]:
faildata

Unnamed: 0,Date,Mode,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,2018-09-24,Valve_Alignment,,2018-02-20 15:04:00,Valve_Alignment,,,,2018-09-24,Valve_Alignment
1,2019-03-08,Cracked_Seal,,2019-03-08 00:00:00,Cracked_Seal,,,,2019-03-08,Cracked_Seal
2,2019-05-16,Broken_Impeller,,2019-05-16 00:00:00,Broken_Impeller,,,,2019-05-16,Broken_Impeller
3,2019-06-03,Broken_Valve,,2019-06-03 00:00:00,Broken_Valve,,,,2019-06-03,Broken_Valve
4,2019-08-05,Leaking_Valve,,2019-08-05 00:00:00,Leaking_Valve,,,,2019-08-05,Leaking_Valve
5,2019-09-28,Motor_Distorted,,2020-02-20 15:02:00,Valve_Alignment,,,,2019-09-28,Motor_Distorted


In [106]:
faildata['Date'] = faildata['Date'].apply(convert2unix)

In [107]:
faildata.sort_values(by = ['Date'], inplace = True)

In [108]:
allfailmodes = ['Motor_Distorted','Broken_Valve','Valve_Alignment','Broken_Impeller','Leaking_Valve', 'Cracked_Seal']
allfailmodeswindows = [60*60*24*7,60*60*24,60*60*24,60*60*24,60*60*24,60*60*24]

In [109]:
failmodeswindows = pd.DataFrame([allfailmodeswindows], columns = allfailmodes)

In [110]:
failmodeswindows

Unnamed: 0,Motor_Distorted,Broken_Valve,Valve_Alignment,Broken_Impeller,Leaking_Valve,Cracked_Seal
0,604800,86400,86400,86400,86400,86400


In [111]:
list = []
for mode in faildata['Mode']:
    list.append(failmodeswindows[mode][0])

In [112]:
faildata['Time Window'] = list

In [113]:
faildata.reset_index(drop = True, inplace = True)

In [114]:
faildata

Unnamed: 0,Date,Mode,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Time Window
0,1537747000.0,Valve_Alignment,,2018-02-20 15:04:00,Valve_Alignment,,,,2018-09-24,Valve_Alignment,86400
1,1552003000.0,Cracked_Seal,,2019-03-08 00:00:00,Cracked_Seal,,,,2019-03-08,Cracked_Seal,86400
2,1557965000.0,Broken_Impeller,,2019-05-16 00:00:00,Broken_Impeller,,,,2019-05-16,Broken_Impeller,86400
3,1559520000.0,Broken_Valve,,2019-06-03 00:00:00,Broken_Valve,,,,2019-06-03,Broken_Valve,86400
4,1564963000.0,Leaking_Valve,,2019-08-05 00:00:00,Leaking_Valve,,,,2019-08-05,Leaking_Valve,86400
5,1569629000.0,Motor_Distorted,,2020-02-20 15:02:00,Valve_Alignment,,,,2019-09-28,Motor_Distorted,604800


In [115]:
list = []
for faildate in faildata['Date']:
    faildiff = faildate - faildata['Date']
    Flag = 0
    for diff in faildiff:
        if diff > 0 and diff < faildata.loc[faildata['Date'] == faildate]['Time Window'].values:
            x = diff
            Flag = 1
    if Flag == 1:
        list.append(x)
    else:
        list.append(faildata.loc[faildata['Date'] == faildate]['Time Window'].values[0])
        

In [116]:
faildata.drop(axis = 1,columns = 'Time Window', inplace = True)

In [117]:
faildata['Time Window'] = list

In [118]:
faildata

Unnamed: 0,Date,Mode,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Time Window
0,1537747000.0,Valve_Alignment,,2018-02-20 15:04:00,Valve_Alignment,,,,2018-09-24,Valve_Alignment,86400
1,1552003000.0,Cracked_Seal,,2019-03-08 00:00:00,Cracked_Seal,,,,2019-03-08,Cracked_Seal,86400
2,1557965000.0,Broken_Impeller,,2019-05-16 00:00:00,Broken_Impeller,,,,2019-05-16,Broken_Impeller,86400
3,1559520000.0,Broken_Valve,,2019-06-03 00:00:00,Broken_Valve,,,,2019-06-03,Broken_Valve,86400
4,1564963000.0,Leaking_Valve,,2019-08-05 00:00:00,Leaking_Valve,,,,2019-08-05,Leaking_Valve,86400
5,1569629000.0,Motor_Distorted,,2020-02-20 15:02:00,Valve_Alignment,,,,2019-09-28,Motor_Distorted,604800


In [119]:
labeled = pd.DataFrame([], columns = a)

In [120]:
for mode in allfailmodes:
    modedates = faildata[faildata['Mode'] == mode]['Date']
    for failuredate in modedates:
        window = faildata[(faildata['Mode'] == mode) & (faildata['Date'] == failuredate)]['Time Window'].values[0]
        z = pd.DataFrame(fixed.loc[(fixed['Time'] < failuredate) & (fixed['Time'] > failuredate - window)])
        z['Condition'] = mode
        labeled = pd.concat([labeled,z])

In [121]:
window

86400

In [122]:
labeled.sort_index(inplace = True)

In [123]:
labeled

Unnamed: 0,Time,Inlet Flowrate,Inlet Pressure,Outlet Pressure,Pump Driver Vibration,Pump Vibration,Condition
154350,1.537661e+09,1838.75000,41.218750,6596.484375,0.076953,0.033477,Valve_Alignment
154351,1.537661e+09,1837.96875,41.203125,6537.578125,0.088125,0.030391,Valve_Alignment
154352,1.537661e+09,1838.75000,41.125000,6587.343750,0.093750,0.028867,Valve_Alignment
154353,1.537661e+09,1839.84375,41.210938,6597.500000,0.087461,0.031055,Valve_Alignment
154354,1.537661e+09,1840.00000,41.273438,6644.218750,0.093867,0.034961,Valve_Alignment
...,...,...,...,...,...,...,...
420745,1.569628e+09,45.31250,45.421875,2.031250,0.027461,0.001641,Motor_Distorted
420746,1.569628e+09,45.31250,45.507812,28.437500,0.049023,0.002891,Motor_Distorted
420747,1.569629e+09,45.62500,45.789062,391.015625,0.042383,0.010352,Motor_Distorted
420748,1.569629e+09,45.46875,45.406250,-1.015625,0.052109,0.002852,Motor_Distorted


In [124]:
list = []
for index1 in fixed.index:
    if index1  not in labeled.index:
        list.append(index1)
        

In [125]:
z = pd.DataFrame(fixed.loc[list,:])
z['Condition'] = 'Normal'
labeled = pd.concat([labeled,z])

In [126]:
labeled.sort_index(inplace = True)

In [127]:
labeled['Condition'].value_counts()

Normal             516962
Motor_Distorted      5040
Broken_Valve          720
Cracked_Seal          720
Valve_Alignment       720
Broken_Impeller       720
Leaking_Valve         720
Name: Condition, dtype: int64

labeled.to_excel('HEYYY.xlsx')

In [128]:
list = []
#####
minvib = 0.05
#####
vibdiff = labeled['Pump Driver Vibration'] - minvib
for item in vibdiff:
    if item > 0:
        list.append('On')
    else:
        list.append('Off')
labeled['Status'] = list

In [129]:
labeled = labeled[labeled['Status'] == 'On']

In [130]:
labeled.reset_index(drop = True, inplace = True)

In [131]:
len(labeled)

301373

In [132]:
allfailmodescols = []
allfailtimescols = []
for mode in allfailmodes:
    allfailmodescols.append(f'{mode} Failure')
    allfailtimescols.append(f'{mode} TT Failure')

In [133]:
allfailtimescols

['Motor_Distorted TT Failure',
 'Broken_Valve TT Failure',
 'Valve_Alignment TT Failure',
 'Broken_Impeller TT Failure',
 'Leaking_Valve TT Failure',
 'Cracked_Seal TT Failure']

In [134]:
def pos(lst):
    return [x for x in lst if x > 0] or None

timeend = labeled['Time'].max()
timestart = labeled['Time'].min()
ttime = timeend-timestart
mainlist = []
for mode in allfailmodes:

    list = []
    check = []
    m = faildata[faildata['Mode'] == mode]['Date']
    lastfailtime = 0
    for timefail in m:
        print(mode,timefail)
        Flag1 = 1
        timediff = timefail - labeled['Time']
        truetimefail = timefail - min(pos(timediff)) 
        for time in labeled['Time']:
            if Flag1:
                if time != truetimefail and time > lastfailtime:
                    list.append(0)
                elif time <= lastfailtime:
                    pass
                else:
                    print('sweet')
                    list.append(1)
                    Flag1 = 0
                    lastfailtime = time

#    for timefail in m:
#        print(mode,timefail)
#        Flag1 = 1
#        timediff = timefail - labeled['Time']
#        truetimefail = timefail - min(pos(timediff)) 
#
#        while Flag1:
#            for time in labeled['Time']:
#                if time != truetimefail:
#                    list.append(0)
#                else:
#                    print('sweet')
#                    list.append(1)
#                    Flag1 = 0
                

    if list == []:
        for time in labeled['Time']:
            list.append(0)
#            check.append()
    mainlist.append(list)
    
#for time in labeled['Time']:
 #   if time != 

mainlist = pd.DataFrame(mainlist)

mainlist = mainlist.transpose()

mainlist.columns = allfailmodescols

mainlist

labeled = pd.concat([labeled,mainlist],axis = 1)

labeled

Motor_Distorted 1569628800.0
sweet
Broken_Valve 1559520000.0
sweet
Valve_Alignment 1537747200.0
sweet
Broken_Impeller 1557964800.0
sweet
Leaking_Valve 1564963200.0
sweet
Cracked_Seal 1552003200.0
sweet


Unnamed: 0,Time,Inlet Flowrate,Inlet Pressure,Outlet Pressure,Pump Driver Vibration,Pump Vibration,Condition,Status,Motor_Distorted Failure,Broken_Valve Failure,Valve_Alignment Failure,Broken_Impeller Failure,Leaking_Valve Failure,Cracked_Seal Failure
0,1.519139e+09,2038.945312,44.089844,5589.695313,0.072129,0.022539,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
1,1.519139e+09,2040.156250,44.150391,5585.048829,0.073216,0.022109,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
2,1.519139e+09,2040.937500,44.190104,5592.539063,0.074336,0.023398,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
3,1.519139e+09,2042.578125,44.185547,5595.966797,0.074385,0.022520,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
4,1.519139e+09,2042.395833,44.158203,5592.539063,0.073105,0.022721,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301368,1.582211e+09,1727.500000,40.437500,7743.125000,0.121094,0.021367,Normal,On,,,,,,
301369,1.582211e+09,1727.656250,40.257812,7718.242188,0.111328,0.021367,Normal,On,,,,,,
301370,1.582211e+09,1728.593750,40.375000,7781.210938,0.105977,0.024687,Normal,On,,,,,,
301371,1.582211e+09,1726.093750,40.171875,7798.476563,0.111094,0.024687,Normal,On,,,,,,


In [135]:
mainlist

Unnamed: 0,Motor_Distorted Failure,Broken_Valve Failure,Valve_Alignment Failure,Broken_Impeller Failure,Leaking_Valve Failure,Cracked_Seal Failure
0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
210173,0.0,,,,,
210174,0.0,,,,,
210175,0.0,,,,,
210176,0.0,,,,,


In [136]:
onoffint = 60*60

In [137]:
off = pd.DataFrame(labeled[labeled['Time'].diff(periods = -1) < -onoffint])
offindex = off.index
off.reset_index(drop = True, inplace = True)
off['Time'] = off['Time'] - timestart

In [138]:
on = pd.DataFrame(labeled[labeled['Time'].diff(periods = 1) > onoffint])
onindex = on.index
on.reset_index(drop = True, inplace = True)
on['Time'] = on['Time'] - timestart

In [139]:
onindex

Int64Index([    86,   2988,   3071,   7707,   9307,  11123,  14041,  18486,
             20169,  26871,
            ...
            293272, 293331, 293402, 293514, 293735, 295477, 295773, 295953,
            301134, 301288],
           dtype='int64', length=827)

In [140]:
### Fix to time on
timestart = labeled['Time'].min()
timestart

1519138890.0

In [141]:
a = labeled['Time'] - timestart

In [142]:
a

0                0.0
1              120.0
2              240.0
3              360.0
4              480.0
             ...    
301368    63071640.0
301369    63071760.0
301370    63071880.0
301371    63072000.0
301372    63072120.0
Name: Time, Length: 301373, dtype: float64

In [143]:
timeoff = on['Time'] - off['Time']

In [144]:
timeoff

0       9120.0
1       3840.0
2       3840.0
3       4560.0
4       5280.0
        ...   
822    28920.0
823     7800.0
824     8400.0
825    10200.0
826     8400.0
Name: Time, Length: 827, dtype: float64

In [145]:
timeoff.index = onindex

In [146]:
labeledlist = a.tolist()

In [147]:
for index in labeled.index:
    if index in timeoff.index:
        labeledlist[index:] -= timeoff[index]

In [148]:
timeonlist = pd.DataFrame(labeledlist,columns = ['Time On'])

In [149]:
timeonlist

Unnamed: 0,Time On
0,0.0
1,120.0
2,240.0
3,360.0
4,480.0
...,...
301368,37553160.0
301369,37553280.0
301370,37553400.0
301371,37553520.0


labeled.drop(columns = 'Time', inplace = True)

In [150]:
labeled = pd.concat([timeonlist,labeled], axis = 1)

labeled.to_excel('Cleaned_Shit.xlsx')

In [151]:
labeled['Valve_Alignment Failure'].value_counts()

0.0    103561
1.0         1
Name: Valve_Alignment Failure, dtype: int64

In [152]:
labeled

Unnamed: 0,Time On,Time,Inlet Flowrate,Inlet Pressure,Outlet Pressure,Pump Driver Vibration,Pump Vibration,Condition,Status,Motor_Distorted Failure,Broken_Valve Failure,Valve_Alignment Failure,Broken_Impeller Failure,Leaking_Valve Failure,Cracked_Seal Failure
0,0.0,1.519139e+09,2038.945312,44.089844,5589.695313,0.072129,0.022539,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
1,120.0,1.519139e+09,2040.156250,44.150391,5585.048829,0.073216,0.022109,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
2,240.0,1.519139e+09,2040.937500,44.190104,5592.539063,0.074336,0.023398,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
3,360.0,1.519139e+09,2042.578125,44.185547,5595.966797,0.074385,0.022520,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
4,480.0,1.519139e+09,2042.395833,44.158203,5592.539063,0.073105,0.022721,Normal,On,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301368,37553160.0,1.582211e+09,1727.500000,40.437500,7743.125000,0.121094,0.021367,Normal,On,,,,,,
301369,37553280.0,1.582211e+09,1727.656250,40.257812,7718.242188,0.111328,0.021367,Normal,On,,,,,,
301370,37553400.0,1.582211e+09,1728.593750,40.375000,7781.210938,0.105977,0.024687,Normal,On,,,,,,
301371,37553520.0,1.582211e+09,1726.093750,40.171875,7798.476563,0.111094,0.024687,Normal,On,,,,,,


In [153]:
def find_zero(lst, lastfail):
    newlst = []
    for item in lst:
        if item >= 0 and item >= lastfail:
            newlst.append(item - lastfail) 
    return(newlst)

In [154]:
allttfaillist = []
for mode in allfailmodes:
#mode = 'Motor_Distorted'
    ttfaillist = []
    timelastfail = 0
    for fail in labeled.loc[labeled[f'{mode} Failure'] == 1]['Time On']:
        a = (fail - labeled['Time On']).tolist()
        ttfaillist += find_zero(a,timelastfail)#.append(find_zero(a,timelastfail))
        timelastfail = fail

    if ttfaillist == []:  
        y = pd.DataFrame(index = np.arange(len(labeled)), columns = ['0'])
        y = np.array(y.values).flatten().tolist()
        allttfaillist.append(y)
    else:
        allttfaillist.append(ttfaillist)

In [155]:
allttfaillist

[[26529240.0,
  26529120.0,
  26529000.0,
  26528880.0,
  26528760.0,
  26528640.0,
  26528520.0,
  26528400.0,
  26528280.0,
  26528160.0,
  26528040.0,
  26527920.0,
  26527800.0,
  26527680.0,
  26527560.0,
  26527440.0,
  26527320.0,
  26527200.0,
  26527080.0,
  26526960.0,
  26526840.0,
  26526720.0,
  26526600.0,
  26526480.0,
  26526360.0,
  26526240.0,
  26526120.0,
  26526000.0,
  26525880.0,
  26525760.0,
  26525640.0,
  26525520.0,
  26525400.0,
  26525280.0,
  26525160.0,
  26525040.0,
  26524920.0,
  26524800.0,
  26524680.0,
  26524560.0,
  26524440.0,
  26524320.0,
  26524200.0,
  26524080.0,
  26523960.0,
  26523840.0,
  26523720.0,
  26523600.0,
  26523480.0,
  26523360.0,
  26523240.0,
  26523120.0,
  26523000.0,
  26522880.0,
  26522760.0,
  26522640.0,
  26522520.0,
  26522400.0,
  26522280.0,
  26522160.0,
  26522040.0,
  26521920.0,
  26521800.0,
  26521680.0,
  26521560.0,
  26521440.0,
  26521320.0,
  26521200.0,
  26521080.0,
  26520960.0,
  26520840.0,
  2652

In [156]:
allttfail = pd.DataFrame(allttfaillist)

In [157]:
allttfail

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,210168,210169,210170,210171,210172,210173,210174,210175,210176,210177
0,26529240.0,26529120.0,26529000.0,26528880.0,26528760.0,26528640.0,26528520.0,26528400.0,26528280.0,26528160.0,...,4560.0,4320.0,3480.0,3360.0,3240.0,3120.0,3000.0,2760.0,120.0,0.0
1,24136200.0,24136080.0,24135960.0,24135840.0,24135720.0,24135600.0,24135480.0,24135360.0,24135240.0,24135120.0,...,,,,,,,,,,
2,12758040.0,12757920.0,12757800.0,12757680.0,12757560.0,12757440.0,12757320.0,12757200.0,12757080.0,12756960.0,...,,,,,,,,,,
3,23948880.0,23948760.0,23948640.0,23948520.0,23948400.0,23948280.0,23948160.0,23948040.0,23947920.0,23947800.0,...,,,,,,,,,,
4,26148840.0,26148720.0,26148600.0,26148480.0,26148360.0,26148240.0,26148120.0,26148000.0,26147880.0,26147760.0,...,,,,,,,,,,
5,23465760.0,23465640.0,23465520.0,23465400.0,23465280.0,23465160.0,23465040.0,23464920.0,23464800.0,23464680.0,...,,,,,,,,,,


In [158]:
allttfail = allttfail.transpose()

In [159]:
allttfail.columns = allfailtimescols

In [160]:
allttfail

Unnamed: 0,Motor_Distorted TT Failure,Broken_Valve TT Failure,Valve_Alignment TT Failure,Broken_Impeller TT Failure,Leaking_Valve TT Failure,Cracked_Seal TT Failure
0,26529240.0,24136200.0,12758040.0,23948880.0,26148840.0,23465760.0
1,26529120.0,24136080.0,12757920.0,23948760.0,26148720.0,23465640.0
2,26529000.0,24135960.0,12757800.0,23948640.0,26148600.0,23465520.0
3,26528880.0,24135840.0,12757680.0,23948520.0,26148480.0,23465400.0
4,26528760.0,24135720.0,12757560.0,23948400.0,26148360.0,23465280.0
...,...,...,...,...,...,...
210173,3120.0,,,,,
210174,3000.0,,,,,
210175,2760.0,,,,,
210176,120.0,,,,,


In [161]:
labeledplustt = pd.concat([labeled, allttfail], axis = 1)

In [162]:
labeledplustt

Unnamed: 0,Time On,Time,Inlet Flowrate,Inlet Pressure,Outlet Pressure,Pump Driver Vibration,Pump Vibration,Condition,Status,Motor_Distorted Failure,...,Valve_Alignment Failure,Broken_Impeller Failure,Leaking_Valve Failure,Cracked_Seal Failure,Motor_Distorted TT Failure,Broken_Valve TT Failure,Valve_Alignment TT Failure,Broken_Impeller TT Failure,Leaking_Valve TT Failure,Cracked_Seal TT Failure
0,0.0,1.519139e+09,2038.945312,44.089844,5589.695313,0.072129,0.022539,Normal,On,0.0,...,0.0,0.0,0.0,0.0,26529240.0,24136200.0,12758040.0,23948880.0,26148840.0,23465760.0
1,120.0,1.519139e+09,2040.156250,44.150391,5585.048829,0.073216,0.022109,Normal,On,0.0,...,0.0,0.0,0.0,0.0,26529120.0,24136080.0,12757920.0,23948760.0,26148720.0,23465640.0
2,240.0,1.519139e+09,2040.937500,44.190104,5592.539063,0.074336,0.023398,Normal,On,0.0,...,0.0,0.0,0.0,0.0,26529000.0,24135960.0,12757800.0,23948640.0,26148600.0,23465520.0
3,360.0,1.519139e+09,2042.578125,44.185547,5595.966797,0.074385,0.022520,Normal,On,0.0,...,0.0,0.0,0.0,0.0,26528880.0,24135840.0,12757680.0,23948520.0,26148480.0,23465400.0
4,480.0,1.519139e+09,2042.395833,44.158203,5592.539063,0.073105,0.022721,Normal,On,0.0,...,0.0,0.0,0.0,0.0,26528760.0,24135720.0,12757560.0,23948400.0,26148360.0,23465280.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
301368,37553160.0,1.582211e+09,1727.500000,40.437500,7743.125000,0.121094,0.021367,Normal,On,,...,,,,,,,,,,
301369,37553280.0,1.582211e+09,1727.656250,40.257812,7718.242188,0.111328,0.021367,Normal,On,,...,,,,,,,,,,
301370,37553400.0,1.582211e+09,1728.593750,40.375000,7781.210938,0.105977,0.024687,Normal,On,,...,,,,,,,,,,
301371,37553520.0,1.582211e+09,1726.093750,40.171875,7798.476563,0.111094,0.024687,Normal,On,,...,,,,,,,,,,


In [163]:
labeledplustt.to_excel('Polished_Shit.xlsx')

In [None]:
#for mode in allfailmodes:
mode = allfailmodes[0]
m = faildata[faildata['Mode'] == mode]
for failureinst in m['Date']:
    labeled.loc[labeled['Time']]

In [None]:
a['Date'][4]