In [3]:
!pip install openpyxl


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [156]:
import numpy as np
import pandas as pd
import dask.dataframe as dd

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

PATH_TO_TEST_DATA = 'X_test.parquet'
PATH_TO_TEST_INTERVALS = 'test_intervals.xlsx'

In [2]:
class _SepConv1d(nn.Module):
    """A simple separable convolution implementation.
    
    The separable convlution is a method to reduce number of the parameters 
    in the deep learning network for slight decrease in predictions quality.
    """
    def __init__(self, ni, no, kernel, stride, pad):
        super().__init__()
        self.depthwise = nn.Conv1d(ni, ni, kernel, stride, padding=pad, groups=ni)
        self.pointwise = nn.Conv1d(ni, no, kernel_size=1)

    def forward(self, x):
        return self.pointwise(self.depthwise(x))
    
class SepConv1d(nn.Module):
    """Implementes a 1-d convolution with 'batteries included'.
    
    The module adds (optionally) activation function and dropout layers right after
    a separable convolution layer.
    """
    def __init__(self, ni, no, kernel, stride, pad, drop=None,
                 activ=lambda: nn.ReLU(inplace=True)):
    
        super().__init__()
        assert drop is None or (0.0 < drop < 1.0)
        layers = [_SepConv1d(ni, no, kernel, stride, pad)]
        if activ:
            layers.append(activ())
        if drop is not None:
            layers.append(nn.Dropout(drop))
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x): 
        return self.layers(x)

class Flatten(nn.Module):
    """Converts N-dimensional tensor into 'flat' one."""

    def __init__(self, keep_batch_dim=True):
        super().__init__()
        self.keep_batch_dim = keep_batch_dim

    def forward(self, x):
        if self.keep_batch_dim:
            return x.transpose(1, 0)
        return x.transpose(1, 0)

In [3]:
class Classifier(nn.Module):
    def __init__(self, raw_ni, fft_ni, no, drop=.5):
        super().__init__()
        
        self.raw = nn.Sequential(
            SepConv1d(    raw_ni,  32, 7, 2, 3, drop=drop),
            SepConv1d(    32,  64, 5, 4, 2, drop=drop),
            SepConv1d(    64, 128, 5, 4, 2, drop=drop),
            SepConv1d(   128, 256, 5, 4, 2),
            nn.Flatten(),
            nn.Dropout(drop), nn.Linear(256, 128), nn.PReLU(), nn.BatchNorm1d(128),
            nn.Dropout(drop), nn.Linear(128, 64), nn.PReLU(), nn.BatchNorm1d(64))
        
        self.fft = nn.Sequential(
            SepConv1d(    fft_ni,  32, 7, 2, 3, drop=drop),
            SepConv1d(    32,  64, 5, 4, 2, drop=drop),
            SepConv1d(    64, 128, 5, 4, 2, drop=drop),
            SepConv1d(   128, 256, 5, 4, 2),
            nn.Flatten(),
            nn.Dropout(drop), nn.Linear(256, 128), nn.PReLU(), nn.BatchNorm1d(128),
            nn.Dropout(drop), nn.Linear(128, 64), nn.PReLU(), nn.BatchNorm1d(64))
        

        self.out1 = nn.Sequential(
            nn.Linear(128, 512), nn.ReLU(inplace=True), nn.Linear(512, 128), nn.ReLU(inplace=True), nn.Linear(128, no))

    def forward(self, t_raw, t_fft):
        raw_out = self.raw(t_raw)
        fft_out = self.fft(t_fft)
        t_in = torch.cat([raw_out, fft_out], dim=1)        
        out1 = self.out1(t_in)
        return out1

In [4]:
def preprocess_data(series):
    '''
        На входу получает историю для одной фичи, данные нормируются от 0 до 1, пропущенные значение интерполируются
    '''
    data = pd.Series(series.values)
    
    inter_data= data.interpolate()

    np_data = np.array(inter_data)
    values = np_data.reshape((len(np_data), 1))
    # train the normalization
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler = scaler.fit(values)
    # normalize the dataset and print the first 5 rows
    norm_inter_data = scaler.transform(values)
    
    return norm_inter_data

def preprocess_dataset(dataset):
    
    list_columns = list(dataset.columns)
    index_columns = list(dataset.index)
    
    df_prep = pd.DataFrame(columns=list_columns, index = index_columns)
    
    for i in list_columns:
        current_data = preprocess_data(dataset[i])
        df_prep[i] = current_data
    return df_prep

In [5]:
data_test = dd.read_parquet(PATH_TO_TEST_DATA, engine="pyarrow")

data_test = data_test.compute()
print('data saved')

data saved


In [6]:
data_test = data_test.iloc[:,:16]

In [7]:
X_test = preprocess_dataset(data_test)

In [8]:
X_test.fillna(method="bfill", inplace = True)

In [9]:
X_test.head(5)

Unnamed: 0,ЭКСГАУСТЕР 4. ТОК РОТОРА 1,ЭКСГАУСТЕР 4. ТОК РОТОРА2,ЭКСГАУСТЕР 4. ТОК СТАТОРА,ЭКСГАУСТЕР 4. ДАВЛЕНИЕ МАСЛА В СИСТЕМЕ,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 1,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 2,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 3,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 4,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА МАСЛА В СИСТЕМЕ,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА МАСЛА В МАСЛОБЛОКЕ,ЭКСГАУСТЕР 4. ВИБРАЦИЯ НА ОПОРЕ 1,ЭКСГАУСТЕР 4. ВИБРАЦИЯ НА ОПОРЕ 2,ЭКСГАУСТЕР 4. ВИБРАЦИЯ НА ОПОРЕ 3,ЭКСГАУСТЕР 4. ВИБРАЦИЯ НА ОПОРЕ 3. ПРОДОЛЬНАЯ.,ЭКСГАУСТЕР 4. ВИБРАЦИЯ НА ОПОРЕ 4,ЭКСГАУСТЕР 4. ВИБРАЦИЯ НА ОПОРЕ 4. ПРОДОЛЬНАЯ.
2022-01-01 00:00:00,0.925756,0.925756,0.045268,0.46898,0.846354,0.9125,0.85661,0.859533,0.068821,0.8588,0.024667,0.012,0.0036,0.062667,0.783077,0.060333
2022-01-01 00:00:10,0.92613,0.92613,0.045357,0.46921,0.846354,0.90736,0.85661,0.864726,0.068821,0.8588,0.025333,0.0116,0.0036,0.062667,0.782615,0.046333
2022-01-01 00:00:20,0.926504,0.926504,0.044777,0.46944,0.845156,0.908645,0.85767,0.859533,0.068821,0.8588,0.024333,0.0115,0.0037,0.063,0.787769,0.041
2022-01-01 00:00:30,0.925756,0.925756,0.045357,0.46852,0.843957,0.90993,0.858731,0.864726,0.068821,0.8588,0.025333,0.0116,0.0035,0.069667,0.789692,0.049667
2022-01-01 00:00:40,0.925007,0.925007,0.045082,0.46898,0.842759,0.911215,0.859792,0.86213,0.068821,0.8588,0.028,0.0122,0.0036,0.059333,0.779846,0.043333


In [10]:
def absfft(x):
    return np.abs(np.fft.fft(x))

feat_fft_array = np.copy(X_test)
feat_fft_array = np.apply_along_axis(absfft, 1, feat_fft_array)

In [11]:
device = 'cuda:3'
raw_feat = 16
fft_feat = 16
num_classes = 23

In [12]:
model_load = Classifier(raw_feat, fft_feat, num_classes).to(device)

model_load.load_state_dict(torch.load('Ex4_M3'))
model_load = model_load.to(device)

In [13]:
def prediction(model_out):
    
    sigmoid_prob = torch.sigmoid(model_out)
    y_pred = torch.as_tensor((sigmoid_prob - 0.5) > 0, dtype=torch.int32)
    
    return np.array(y_pred.cpu())

In [14]:
X_tensor = torch.tensor(X_test.values)
feat_fft_tensor = torch.tensor(feat_fft_array)

In [15]:
X_tensor = X_tensor.clone().detach()
feat_fft_tensor = feat_fft_tensor.clone().detach()

X_tensor = X_tensor.float()
feat_fft_tensor = feat_fft_tensor.float()

X_tensor = X_tensor.to(device)
feat_fft_tensor = feat_fft_tensor.to(device)

In [16]:
X_tensor.shape

torch.Size([4008961, 16])

In [17]:
X_tensor1 = X_tensor[:1008961,:,None]
feat_fft_tensor1 = feat_fft_tensor[:1008961,:,None]

out = model_load(X_tensor1, feat_fft_tensor1)

answer1 = prediction(out)

In [21]:
device = 'cuda:2'
model_load = model_load.to(device)
X_tensor = X_tensor.to(device)
feat_fft_tensor = feat_fft_tensor.to(device)

In [22]:
X_tensor2 = X_tensor[1008961:2008961,:,None]
feat_fft_tensor2 = feat_fft_tensor[1008961:2008961,:,None]

out = model_load(X_tensor2, feat_fft_tensor2)

answer2 = prediction(out)

In [25]:
device = 'cuda:1'
model_load = model_load.to(device)
X_tensor = X_tensor.to(device)
feat_fft_tensor = feat_fft_tensor.to(device)

In [26]:
X_tensor3 = X_tensor[2008961:3008961,:,None]
feat_fft_tensor3 = feat_fft_tensor[2008961:3008961,:,None]

out = model_load(X_tensor3, feat_fft_tensor3)

answer3 = prediction(out)

In [27]:
device = 'cuda:0'
model_load = model_load.to(device)
X_tensor = X_tensor.to(device)
feat_fft_tensor = feat_fft_tensor.to(device)

In [28]:
X_tensor4 = X_tensor[3008961:,:,None]
feat_fft_tensor4 = feat_fft_tensor[3008961:,:,None]

out = model_load(X_tensor4, feat_fft_tensor4)

answer4 = prediction(out)

In [32]:
answer = np.concatenate((answer1, answer2, answer3, answer4))

In [35]:
predict_M3 = answer

In [37]:
with open('test.npy', 'wb') as f:
    np.save(f, predict_M3)

In [2]:
with open('test.npy', 'rb') as f:
    tt = np.load(f)

In [3]:
tt

array([[1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       ...,
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0]], dtype=int32)

In [157]:
table = pd.read_excel(PATH_TO_TEST_INTERVALS)

start_interval_time = table['start']
finish_interval_time = table['finish']

In [105]:
list_index = []
target = []

for index, row in tqdm(table.iterrows()):
        
        start_interval_time = row['start']
        finish_interval_time = row['finish']
        
        idx_start = len(data_test.loc[data_test.index <= start_interval_time])
        idx_end = len(data_test.loc[data_test.index <= finish_interval_time])       
        
        interval = data_test.iloc[idx_start:idx_end,:].index
        target_value = tt[idx_start:idx_end,:]
        
        list_index.append(interval)
        target.append(target_value)

0it [00:00, ?it/s]

In [109]:
list_index[:2]

[DatetimeIndex(['2022-01-01 10:55:00', '2022-01-01 10:55:10',
                '2022-01-01 10:55:20', '2022-01-01 10:55:30',
                '2022-01-01 10:55:40', '2022-01-01 10:55:50',
                '2022-01-01 10:56:00', '2022-01-01 10:56:10',
                '2022-01-01 10:56:20', '2022-01-01 10:56:30',
                ...
                '2022-01-01 16:42:00', '2022-01-01 16:42:10',
                '2022-01-01 16:42:20', '2022-01-01 16:42:30',
                '2022-01-01 16:42:40', '2022-01-01 16:42:50',
                '2022-01-01 16:43:00', '2022-01-01 16:43:10',
                '2022-01-01 16:43:20', '2022-01-01 16:43:30'],
               dtype='datetime64[ns]', name='DT', length=2092, freq=None),
 DatetimeIndex(['2022-01-02 12:26:50', '2022-01-02 12:27:00',
                '2022-01-02 12:27:10', '2022-01-02 12:27:20',
                '2022-01-02 12:27:30', '2022-01-02 12:27:40',
                '2022-01-02 12:27:50', '2022-01-02 12:28:00',
                '2022-01-02 12:28:10

In [111]:
len(list_index)

189

In [112]:
len(target)

189

In [110]:
target[:2]

[array([[1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        ...,
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0]], dtype=int32),
 array([[1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        ...,
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0]], dtype=int32)]

In [90]:
np.concatenate((target[0],target[1]))

array([[1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       ...,
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0]], dtype=int32)

In [79]:
A = []
for i in target:
    A.append(i)

In [82]:
A

[array([[1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        ...,
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0]], dtype=int32),
 array([[1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        ...,
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0]], dtype=int32),
 array([[1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        ...,
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0]], dtype=int32),
 array([[1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        ...,
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0]], dtype=int32),
 array([[1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0],
        [1, 0, 1, ..., 0, 0, 0

In [31]:
list_index[0]

DatetimeIndex(['2022-01-01 10:55:00', '2022-01-01 10:55:10',
               '2022-01-01 10:55:20', '2022-01-01 10:55:30',
               '2022-01-01 10:55:40', '2022-01-01 10:55:50',
               '2022-01-01 10:56:00', '2022-01-01 10:56:10',
               '2022-01-01 10:56:20', '2022-01-01 10:56:30',
               ...
               '2022-01-01 16:42:00', '2022-01-01 16:42:10',
               '2022-01-01 16:42:20', '2022-01-01 16:42:30',
               '2022-01-01 16:42:40', '2022-01-01 16:42:50',
               '2022-01-01 16:43:00', '2022-01-01 16:43:10',
               '2022-01-01 16:43:20', '2022-01-01 16:43:30'],
              dtype='datetime64[ns]', name='DT', length=2092, freq=None)

In [32]:
name_labels = ['Y_ЭКСГАУСТЕР А/М №4_ЭЛЕКТРОДВИГАТЕЛЬ ДСПУ-140-84-4 ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ПОДШИПНИК ОПОРНЫЙ ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_УЛИТА ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_РЕДУКТОР ГАЗ. ЗАДВИЖКИ ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_МАСЛОНАСОС РАБОЧИЙ ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ЭЛЕКТРОАППАРАТУРА ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ПОДШИПНИК ОПОРНЫЙ №2 ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ЗАДВИЖКА ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ГСМ ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ ЭКСГ.№4 Т.1',
 'Y_ЭКСГАУСТЕР А/М №4_ТСМТ-101-010-50М-400 ТЕРМОПР.ПОДШ.Т.1',
 'Y_ЭКСГАУСТЕР А/М №4_РОТОР ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_МАСЛООХЛАДИТЕЛЬ  М-05-1 ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ПОДШИПНИК ОПОРНО-УПОРНЫЙ ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ЭКСГАУСТЕР А/М №4',
 'Y_ЭКСГАУСТЕР А/М №4_ЗАП. И РЕГ. АРМАТУРА ЭКСГ.№4',
 'Y_ЭКСГАУСТЕР А/М №4_ПОДШИПНИК ОПОРНЫЙ №1 ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ЭЛ/ДВИГАТЕЛЬ ГАЗ. ЗАДВИЖКИ ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_КЛ1 ТР№4 ДО ЭД ЭКСГАУСТЕРА №4',
 'Y_ЭКСГАУСТЕР А/М №4_КЛ2 ТР№4 ДО ЭД ЭКСГАУСТЕРА №4',
 'Y_ЭКСГАУСТЕР А/М №4_ТР-Р ТМ-4000-10/6 ЭКСГ. №4',
 'Y_ЭКСГАУСТЕР А/М №4_ТИРИСТ. ВОЗБУДИТЕЛЬ ВТ-РЭМ-400 ЭКСГ4 ВУ1',
 'Y_ЭКСГАУСТЕР А/М №4_МАСЛОПРОВОДЫ ЭКСГ №4'
]

In [117]:
int_counts = len(list_index)
int_counts

189

In [139]:
df_columns = pd.DataFrame(columns = name_labels)

for i in range(int_counts):
    df = pd.DataFrame(data = target[i], index = list_index[i], columns = name_labels)
    df_columns = pd.concat([df_columns, df])
    
df_columns.replace(1, 2, inplace=True)

In [146]:
file_name = 'submit_M3.csv'
df_columns.to_csv(file_name)

In [158]:
table

Unnamed: 0.1,Unnamed: 0,start,finish,machine,tm
0,97,2022-01-01 10:54:53,2022-01-01 16:43:39,,
1,118,2022-01-02 12:26:42,2022-01-02 15:16:06,,
2,126,2022-01-04 12:06:07,2022-01-04 15:04:34,,
3,163,2022-01-04 20:13:57,2022-01-05 00:19:40,,
4,0,2022-01-07 09:05:16,2022-01-07 14:05:15,,
...,...,...,...,...,...
184,66,2023-04-07 15:36:42,2023-04-07 19:27:38,,
185,138,2023-04-09 19:55:43,2023-04-09 23:12:53,,
186,67,2023-04-10 18:08:25,2023-04-10 21:47:02,,
187,68,2023-04-18 02:28:43,2023-04-18 06:38:47,,
