In [2]:
import numpy as np
import pandas as pd
from scipy.stats import entropy
fnenm = pd.read_csv("pdm_fnenm.csv")
scaled_f_telem = pd.read_csv("abszscale_telemetry.csv")
OverallTelemHists = {
    "volt": [np.histogram(scaled_f_telem[["volt"]],bins=np.arange(0,6,0.01),density=True)],
    "rotate": [np.histogram(scaled_f_telem[["rotate"]],bins=np.arange(0,6,0.01),density=True)],
    "pressure": [np.histogram(scaled_f_telem[["pressure"]],bins=np.arange(0,8,0.01),density=True)],
    "vibration": [np.histogram(scaled_f_telem[["vibration"]],bins=np.arange(0,7,0.01),density=True)],
}

In [3]:
fnenm

Unnamed: 0.1,Unnamed: 0,datetime,machineID,errorID,failure,maint_comp
0,0,2014-06-01 06:00:00,1,,,comp2
1,1,2014-07-16 06:00:00,1,,,comp4
2,2,2014-07-31 06:00:00,1,,,comp3
3,3,2014-12-13 06:00:00,1,,,comp1
4,0,2015-01-03 07:00:00,1,error1,,
...,...,...,...,...,...,...
7961,3917,2015-12-08 06:00:00,100,error3,,
7962,760,2015-12-09 06:00:00,100,,comp2,
7963,3284,2015-12-09 06:00:00,100,,,comp2
7964,3918,2015-12-22 03:00:00,100,error3,,


In [1]:
def getTelemetryFeatures(datetime, machineID):
    datetime = pd.to_datetime(datetime)
    telem_features = {}
    mactelem = scaled_f_telem[scaled_f_telem['machineID']==machineID]
    mactelem = mactelem[datetime - pd.to_datetime(mactelem['datetime']) <= pd.Timedelta("7 days")]
    mactelem = mactelem[datetime - pd.to_datetime(mactelem['datetime']) >= pd.Timedelta("0")]
    binNumber = {'volt':6, 'rotate':6, 'pressure':8, 'vibration':7}
    currentTelemHists = {}
    currentTelemBins = {}
    
    for metric in ['volt','rotate','pressure','vibration']:
        currentTelemHists[metric] = [np.histogram(mactelem[[metric]],bins=np.arange(0,binNumber[metric],0.01),density=True)]  
        telem_features[metric+"_KLD"] =  entropy(currentTelemHists[metric][0][0], OverallTelemHists[metric][0][0])
        telem_features[metric+"_max"] = np.max(mactelem[metric])
        currentTelemBins[metric] = [np.histogram(mactelem[[metric]],bins=np.arange(0,binNumber[metric]+1))]
        for bnm_ in range(1,binNumber[metric]):
            telem_features[metric+"_bin"+ str(bnm_) +"ct"] = currentTelemBins[metric][0][0][bnm_]
        telem_features[metric+"_avg"] = np.mean(mactelem[metric])
        telem_features[metric+"_topavg"] = np.mean(mactelem[metric].sort_values(ascending=False)[:10])  
    return telem_features

In [86]:
def sampleByMacIDN(machineID):
    fnenm_cur_mac = fnenm[fnenm['machineID']==machineID]
    sampleDF_curmac = pd.DataFrame()
    last_maint_date = None
    n_maint_dates = []
    for index,line in fnenm_cur_mac.iterrows():
        if not pd.isna(line['maint_comp']) and pd.to_datetime(line['datetime'])> pd.to_datetime('2015-01-01'):
            if last_maint_date == None:
                last_maint_date = line['datetime']
            else:
                timeD = (pd.to_datetime(line['datetime']) - pd.to_datetime(last_maint_date)).days
                if timeD >= 15:
                    for _ in range(1, timeD-7):
                        n_maint_dates.append(pd.to_datetime(last_maint_date)+pd.Timedelta(str(_)+" days"))
                
    n_maint_dates = set(n_maint_dates)
    
    df_alldates = pd.DataFrame()
    
    for date in n_maint_dates:
        last_maint = None
        errorVec = [0,0,0,0,0]
        failureVec = [0,0,0,0]
        df_curdate = pd.DataFrame()
        for index,line in fnenm_cur_mac.iterrows():
            del1 = pd.to_datetime(date) - pd.to_datetime(line['datetime']) 
            if del1>= pd.Timedelta("0"):
                if line['maint_comp'] in ["comp1","comp2","comp3","comp4"]:
                    last_maint = line['datetime']
                if del1 <= pd.Timedelta("15 days"):
                    if not  pd.isna(line['errorID']):
                        errorVec[int(line['errorID'][-1])-1] += 1
                if del1 <= pd.Timedelta("2 days"):
                    if not pd.isna(line['failure']):
                        failureVec[int(line['failure'][-1])-1] += 1
        df_curdate.loc[0, 'time_since_last_maint']= (pd.to_datetime(date) - pd.to_datetime(last_maint)).days
        for _ in range(5):
            df_curdate.loc[0, 'error'+ str(_+1) +'_in_last_month'] = int(errorVec[_])
        for _ in range(4):
            df_curdate.loc[0, 'failure'+ str(_+1) +'_in_2_days'] = int(failureVec[_])
        telem_feats = getTelemetryFeatures(date, machineID)
        for tf_ in telem_feats:
            df_curdate.loc[0, tf_] = telem_feats[tf_]
        
        df_alldates = pd.concat([df_alldates,df_curdate], axis=0)
        
    df_alldates = df_alldates[df_alldates['time_since_last_maint']>2]
    return df_alldates


In [87]:
def getAllNotMaints():
    all_df = pd.DataFrame()
    for i in range(1,101):
        df = sampleByMacIDN(i)
        all_df = pd.concat([all_df,df], axis=0)
    return all_df

In [88]:
res = getAllNotMaints()

In [89]:
res

Unnamed: 0,time_since_last_maint,error1_in_last_month,error2_in_last_month,error3_in_last_month,error4_in_last_month,error5_in_last_month,failure1_in_2_days,failure2_in_2_days,failure3_in_2_days,failure4_in_2_days,...,vibration_KLD,vibration_max,vibration_bin1ct,vibration_bin2ct,vibration_bin3ct,vibration_bin4ct,vibration_bin5ct,vibration_bin6ct,vibration_avg,vibration_topavg
0,8.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.676823,2.515216,39.0,8.0,0.0,0.0,0.0,0.0,0.729369,2.163214
0,13.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.776455,3.490870,42.0,8.0,1.0,0.0,0.0,0.0,0.742263,2.471737
0,9.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.664273,2.194054,43.0,4.0,0.0,0.0,0.0,0.0,0.689027,1.925594
0,13.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.689563,2.206774,42.0,4.0,0.0,0.0,0.0,0.0,0.679283,1.941967
0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.694022,2.961420,46.0,5.0,0.0,0.0,0.0,0.0,0.760339,2.072868
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,13.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.747424,2.724610,47.0,8.0,0.0,0.0,0.0,0.0,0.815017,2.237344
0,14.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.867264,3.346064,56.0,17.0,2.0,0.0,0.0,0.0,1.007068,2.732158
0,13.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.777984,2.313609,39.0,2.0,0.0,0.0,0.0,0.0,0.683615,1.940534
0,13.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.742676,2.763350,47.0,8.0,0.0,0.0,0.0,0.0,0.790832,2.278165


In [90]:
res.to_csv("notMaints.csv")