#### Подготовка матрицы объекты-признаки для классификации событий изменения уровня топлива:
- сливы-заправки
- штатный расход

In [173]:
# imports
import pandas as pd
import numpy as np

pd.options.display.max_rows = 6

#### Константы
- пути к данным
- признаки, описывающие данные
- типы "столбцов", или "подпризнаков"
- ID ТС

In [174]:
DATA_PATH = 'data/vehicle_dataset_public/'
DATA_PROC_PATH = 'data/vehicle_proccessed/'

TAGS_FEATURES = ['fuelLevel', 'ingection', 'speedAndHeight', 'tachometer']
TAGS_TARGET = ['refueling2']
TAGS = TAGS_FEATURES + TAGS_TARGET

TIME_COLUMNS = ['DTIME', 'STARTDATE', 'ENDDATE']
FLOAT_COLUMNS = ['BEVALUE', 'SPEED', 'HEIGHT', 'STARTLEVEL', 'ENDLEVEL']

VEHICLE_IDS = [1, 3, 5, 19, 28]

In [175]:
def timeDeltaToSeconds(TimeDeltaToMinutes):
    return np.dot(TimeDeltaToMinutes.dt.components,
                  [24*(60**2), 60**2, 60, 1, 1/60, 1/(60**2), 1/(60**3)])

In [176]:
df = pd.read_csv(DATA_PROC_PATH + 'vehicle1_summary.csv', index_col='i')
df.dropna(inplace=True)
df['dtime'] = pd.to_datetime(df['dtime'])

df

Unnamed: 0_level_0,dtime,fuellevel,ingection,speed,height,tachometer
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2020-01-08 21:16:02,0.0,0,0,-22.9,0
1,2020-01-08 21:17:04,0.0,0,0,-22.9,0
2,2020-01-08 21:18:04,0.0,0,0,-22.9,0
...,...,...,...,...,...,...
19029,2020-06-27 01:15:14,54.0,1,21,-64.8,1408
19030,2020-06-27 01:16:14,54.2,1,6,-111.9,928
19031,2020-06-27 01:17:14,54.4,1,6,-111.9,832


In [177]:
df_fuel_change = pd.DataFrame()

DELTA_FEATURES = ['dtime', 'fuellevel', 'tachometer']

for feature_name in DELTA_FEATURES:
    val_start = df[feature_name].iloc[:-1].to_numpy()
    val_end = df[feature_name].iloc[1:].to_numpy()

    delta_val = val_end - val_start

    if feature_name == 'dtime':
        df_fuel_change['dtime_start'] = val_start
        df_fuel_change['dtime_end'] = val_end
        delta_val = timeDeltaToSeconds(pd.DataFrame(delta_val, columns=['deltaDate'])['deltaDate'])

    df_fuel_change['delta_' + feature_name] = delta_val

df_fuel_change['delta_fuellevel_abs'] = np.abs(df_fuel_change['delta_fuellevel'])
df_fuel_change['delta_fuellevel_sign'] = np.sign(df_fuel_change['delta_fuellevel'])
df_fuel_change.drop(columns=['delta_fuellevel'], inplace=True)

df_fuel_change.rename({'delta_dtime':'delta_seconds'}, axis='columns', inplace=True)

df_fuel_change

Unnamed: 0,dtime_start,dtime_end,delta_seconds,delta_tachometer,delta_fuellevel_abs,delta_fuellevel_sign
0,2020-01-08 21:16:02,2020-01-08 21:17:04,62.0,0,0.0,0.0
1,2020-01-08 21:17:04,2020-01-08 21:18:04,60.0,0,0.0,0.0
2,2020-01-08 21:18:04,2020-01-09 10:05:26,46042.0,1248,49.7,1.0
...,...,...,...,...,...,...
19028,2020-06-27 00:49:05,2020-06-27 01:15:14,1569.0,576,0.2,-1.0
19029,2020-06-27 01:15:14,2020-06-27 01:16:14,60.0,-480,0.2,1.0
19030,2020-06-27 01:16:14,2020-06-27 01:17:14,60.0,-96,0.2,1.0


In [179]:
df_fuel_change['lpm'] = df_fuel_change['delta_fuellevel_abs']/df_fuel_change['delta_seconds']

df_fuel_change

Unnamed: 0,dtime_start,dtime_end,delta_seconds,delta_tachometer,delta_fuellevel_abs,delta_fuellevel_sign,lpm
0,2020-01-08 21:16:02,2020-01-08 21:17:04,62.0,0,0.0,0.0,0.000000
1,2020-01-08 21:17:04,2020-01-08 21:18:04,60.0,0,0.0,0.0,0.000000
2,2020-01-08 21:18:04,2020-01-09 10:05:26,46042.0,1248,49.7,1.0,0.001079
...,...,...,...,...,...,...,...
19028,2020-06-27 00:49:05,2020-06-27 01:15:14,1569.0,576,0.2,-1.0,0.000127
19029,2020-06-27 01:15:14,2020-06-27 01:16:14,60.0,-480,0.2,1.0,0.003333
19030,2020-06-27 01:16:14,2020-06-27 01:17:14,60.0,-96,0.2,1.0,0.003333


In [183]:
dict(df_fuel_change['lpm'].describe())


{'count': 19031.0,
 'mean': 0.014651142971424454,
 'std': 0.0832652531452787,
 'min': 0.0,
 '25%': 0.0,
 '50%': 0.0033333333333333214,
 '75%': 0.006666666666666643,
 'max': 0.995}