In [27]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
df = pd.read_csv('data/task1/dataset.csv', encoding='cp1251')

In [3]:
stops = pd.read_csv('data/task1/stops.csv')

In [4]:
pd.set_option('display.max_colwidth', 200)

In [5]:
df.date = pd.to_datetime(df.date, infer_datetime_format=True)

In [6]:
stops.date = pd.to_datetime(stops.date, dayfirst=True)
print(stops)

                  date       type
0  2017-01-11 11:03:00       stop
1  2017-01-30 08:51:00       stop
2  2017-02-24 17:45:00       stop
3  2017-03-07 00:16:00       stop
4  2017-03-08 11:23:00  vibrosito
5  2017-03-08 15:56:00  vibrosito
6  2017-03-10 17:55:00       stop
7  2017-03-31 01:42:00  vibrosito
8  2017-04-01 07:45:00       stop
9  2017-04-03 20:48:00       stop
10 2017-04-21 10:02:00       stop
11 2017-04-24 13:51:00       stop
12 2017-06-04 13:23:00       stop
13 2017-06-06 13:32:00  vibrosito
14 2017-06-07 16:29:00  vibrosito
15 2017-10-06 03:21:00       stop
16 2017-10-08 16:00:00       stop
17 2017-10-14 14:13:00       stop
18 2017-10-15 09:34:00  vibrosito
19 2017-10-21 02:23:00       stop
20 2017-11-07 04:13:00       stop
21 2017-11-09 04:00:00       stop
22 2017-11-10 13:06:00       stop
23 2017-11-14 05:20:00       stop
24 2017-11-16 21:25:00       stop
25 2017-11-19 01:55:00  vibrosito
26 2017-11-19 11:21:00  vibrosito
27 2017-11-21 22:53:00       stop
28 2017-12-09 

In [7]:
df.isna().sum()

date                                            0
RF.21304.Ток...213MII904A                   77760
S.C.ВПУСК.ПП.ДАВЛ...214PI226AA             151199
S.C.ВПУСК.ПП.ДАВЛ...214PI226AB             151199
S.C.ВПУСК.ПП.ТЕМП...214TI232A              151199
SPEED.CONTROLLER...250MSIC001.PV           155519
XX21401.МАСС.РАСХ.ГРАНУЛ...214FIC112.PV     77760
ДАВЛ.ВАЛ.ВПЕР.УПР...214PIC232A             151199
ДВИГАТЕЛЬ.ГРАНУЛЯТОРА...214MSIC981          77760
Е.21402А....ГР.ВОДА.ВЫХ...214TIC101A       151199
ЕХ21401.АЕ01.MFR...214AI200A                77760
НАГНЕТАНИЕ.В.EX.21401...250PI001            77760
ПОЛОЖ.НОЖА..ГРАНУЛЯТОРА...214ZI211A        151199
ПОЛОЖ.ЩЕЛ.ДИСКА...214ZI202A                155519
РАСХОД.В....EX.21401...250FIC001.PV         77760
РАСХОД.ЛИНИИ.P.W...214FI205A               151199
СЕКТ.ПИТАТЕЛЬ.RF21304...213MSIC904.PV       77760
СПЕЦ.ЭНЕРГИЯ...214JYY200A                   77760
ТЕМП.ЛИН.Г.В..ДР.ЛИН....214TI247A           77760
ТЕМП.ЛИН.Г.В..ЛИН.РАЗ....214TI246A         151199


In [8]:
df = df.merge(stops, left_on='date', right_on='date', how='outer')

In [9]:
df['type'].replace(['vibrosito', 'stop'], 1, inplace=True)
df['type'].fillna(method='backfill', limit=180, inplace=True)
df.fillna(0, inplace=True)

In [10]:
df = df.rename(index=str, columns={"type": "target"})

In [11]:
n = df[df.date > pd.Timestamp('2018-01-01 00:00:00')].count()[0]
df.drop(df.index[-n:], inplace=True)

In [12]:
target = df['target']

In [133]:
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from keras.optimizers import Adam
from keras import backend as K

In [134]:
def f1_score(true, pred):
    p = precision(true, pred)
    r = recall(true, pred)
    return 2 * (p * r) / (p + r + 1e-6)

def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

In [14]:
def get_model(input_shape):
    
    X_input = Input(shape = input_shape)
    
    X = Conv1D(196, kernel_size=20, strides=10)(X_input)                           
    X = BatchNormalization()(X)                                
    X = Activation('relu')(X)                                 
    X = Dropout(0.8)(X)                                 

    X = GRU(units = 128, return_sequences = True)(X) 
    X = Dropout(0.8)(X)                              
    X = BatchNormalization()(X)                      
    
    X = GRU(units = 128, return_sequences = True)(X) 
    X = Dropout(0.8)(X)                              
    X = BatchNormalization()(X)                      
    X = Dropout(0.8)(X)                              
    
    X = TimeDistributed(Dense(1, activation = "sigmoid"))(X) 

    model = Model(inputs = X_input, outputs = X)
    
    return model

In [15]:
X_df = df.iloc[:, 1:-1].values
y_df = df.iloc[:, -1].values

In [54]:
sample = df.count()[0]
seq_len = 2160

In [29]:
40*30*6/3151440

0.002284669865204478

In [48]:
X_0_train = []
while len(X_0_train) < 500:
    n = random.randint(seq_len, sample*3//4)
    X_0_train.append(df.iloc[n-seq_len:n, 1:].values)

In [49]:
indices_with_ones = df[df['target'] == 1].index.tolist()
indices_with_ones_train = indices_with_ones[:len(indices_with_ones)*3//4]
X_1_train = []
while len(X_1_train) < 500:
    i = int(random.choice(indices_with_ones))
    corr = random.randint(0, seq_len//2)
    X_1_train.append(df.iloc[i+corr-seq_len:i+corr, 1:].values)

In [55]:
X_0_test = []
while len(X_0_test) < 200:
    n = random.randint(sample*3//4, sample)
    X_0_test.append(df.iloc[n-seq_len:n, 1:].values)

In [56]:
indices_with_ones_test = indices_with_ones[len(indices_with_ones)*3//4:]
X_1_test = []
while len(X_1_test) < 200:
    i = int(random.choice(indices_with_ones))
    corr = random.randint(0, seq_len//2)
    X_1_test.append(df.iloc[i+corr-seq_len:i+corr, 1:].values)

In [86]:
X_train = X_0_train + X_0_train
X_test = X_0_test + X_1_test
X_train = np.array(X_train)
X_test = np.array(X_test)

In [90]:
X = X_train[:,:,:-1]
y = X_train[:,:-10:10,-1:]
print(X.shape, y.shape)

(1000, 2160, 43) (1000, 215, 1)


In [95]:
model = get_model(input_shape = (2160, 43))

In [135]:
opt = Adam(lr=0.1, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy", f1_score])

In [97]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 2160, 43)          0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 215, 196)          168756    
_________________________________________________________________
batch_normalization_7 (Batch (None, 215, 196)          784       
_________________________________________________________________
activation_3 (Activation)    (None, 215, 196)          0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 215, 196)          0         
_________________________________________________________________
gru_5 (GRU)                  (None, 215, 128)          124800    
_________________________________________________________________
dropout_10 (Dropout)         (None, 215, 128)          0         
__________

In [None]:
model.fit(X, y, batch_size = 10, epochs=1, validation_data = (X_val, y_val))

Train on 1000 samples, validate on 400 samples
Epoch 1/1
 170/1000 [====>.........................] - ETA: 46s - loss: 0.0243 - acc: 0.9985 - f1_score: 0.0000e+00

In [127]:
X_val = X_test[:,:,:-1]
y_val = X_test[:,:-10:10,-1:]
print(X_val.shape, y_val.shape)

(400, 2160, 43) (400, 215, 1)


In [130]:
y_pred = model.predict(X)
y_pred.shape

(1000, 215, 1)

In [121]:
from sklearn.metrics import f1_score
print(f1_score(y_val, y_pred))


0.0


  'precision', 'predicted', average, warn_for)


In [120]:
print(y_val, y_pred)

[0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.]


In [132]:
sum(sum(y_pred))

array([6.951381e-12], dtype=float32)