In [438]:
import sys
import tqdm
import os
import itertools
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.layers import Dropout
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import classification_report
from sklearn import preprocessing

In [None]:
from google.colab import files
files.upload()

In [None]:
df = pd.read_excel('./2016-2020.xlsx', dtype={'UWAGI': str, 'DATE': str})
df_copy = df.copy()

------------------------------------------------------------------

In [636]:
def _from_hour_to_part_of_day(hour, n):
  period = 24/n
  return int(hour//period)

def split_day_into_parts(df, n=6):
  df = df.copy()
  df['part_of_day'] = df.GODZ.apply(_from_hour_to_part_of_day, args=(n,))
  return df

def add_missing_rows(df, n=6):
  df = df.copy()
  dates = [date.strftime("%Y.%m.%d") for date in pd.date_range(start="2016-01-01",end="2020-12-31")]
  for date in tqdm(dates):
    for part in range(n):
      if not ((df['DATA'] == date)&(df['part_of_day'] == part)).any():
        new_row = {'DATA':date, 'part_of_day':part}
        df = df.append(new_row, ignore_index=True)
  df = df.fillna(0)
  return df.sort_values(by=['DATA', 'part_of_day']).reset_index(drop=True)

def get_X_Y(df, days, n, power_limit, used_columns):
  df = df.copy()
  number_of_days = int(df.shape[0]/n - days)
  X = []
  input_len = days*n
  for day in range(number_of_days):
    x = df[used_columns][day*n:day*n+input_len].values.flatten()
    X.append(x)
  df.loc[df.ENG_not_notmalized < 10000, 'ENG_not_notmalized'] = 0
  df.loc[df.ENG_not_notmalized > 10000, 'ENG_not_notmalized'] = 1  
  Y = df['ENG_not_notmalized'][input_len:].values.reshape(number_of_days,n)
  return np.array(X), np.array(Y)

def normalize_columns(df, columns):
  x = df[columns].values # returns a numpy array
  min_max_scaler = preprocessing.MinMaxScaler()
  x_scaled = min_max_scaler.fit_transform(x)
  df[columns] = pd.DataFrame(x_scaled)
  return df

In [642]:
n = 3 # na ile okrwsów dzielimy dzień
days = 14 # na podstawie ilu dni przewidujemy
power_limit = 5000 # od jakiego wybuchu, uważamy go za duży
used_columns = ['X', 'Y', 'Z', 'ENG', 'TYP_0',	'TYP_O',	'TYP_SL',	'TYP_T',	'TYP_W']
df = df_copy
df = pd.get_dummies(df, columns=['TYP'])
df['REJON_ODDZAL'] = df['REJON'] + df['ODDZIAL']
df = df[df['REJON_ODDZAL'] == 'RGG-1']
df = split_day_into_parts(df, n)
df = df.groupby(by=["DATA", "part_of_day"]).sum().reset_index()[['DATA', 'part_of_day'] + used_columns]
df = add_missing_rows(df, n)
df['ENG_not_notmalized'] = df['ENG']
# df = normalize_columns(df, used_columns)
X, Y = get_X_Y(df, days, n, power_limit, used_columns)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42, shuffle=True)

100%|██████████| 1827/1827 [00:06<00:00, 261.59it/s]


In [643]:
model = Sequential()
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(n, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['mse'])
model.fit(X_train, y_train, epochs=10, verbose=2, validation_split=0.2)

Epoch 1/10
31/31 - 1s - loss: 2673395.7500 - mse: 34329.2500 - val_loss: 735164.7500 - val_mse: 0.3347
Epoch 2/10
31/31 - 0s - loss: -1.6923e+07 - mse: 34322.3750 - val_loss: 733082.5000 - val_mse: 0.3347
Epoch 3/10
31/31 - 0s - loss: -1.9680e+07 - mse: 34322.3828 - val_loss: 731164.1875 - val_mse: 0.3347
Epoch 4/10
31/31 - 0s - loss: -2.7591e+07 - mse: 34322.3672 - val_loss: 729446.9375 - val_mse: 0.3347
Epoch 5/10
31/31 - 0s - loss: -4.0117e+07 - mse: 34322.3711 - val_loss: 727664.8125 - val_mse: 0.3333
Epoch 6/10
31/31 - 0s - loss: -2.7033e+07 - mse: 34322.3828 - val_loss: 725977.0625 - val_mse: 0.3333
Epoch 7/10
31/31 - 0s - loss: -3.5585e+06 - mse: 34322.3828 - val_loss: 724239.7500 - val_mse: 0.3320
Epoch 8/10
31/31 - 0s - loss: -9.7755e+06 - mse: 34322.3750 - val_loss: 722535.9375 - val_mse: 0.3306
Epoch 9/10
31/31 - 0s - loss: -5.4923e+07 - mse: 34322.3672 - val_loss: 720959.0000 - val_mse: 0.3306
Epoch 10/10
31/31 - 0s - loss: -1.8631e+07 - mse: 34322.3633 - val_loss: 719380.4

<keras.callbacks.History at 0x7fbadd5e9050>

In [644]:
y_pred = model.predict(X_test)

results = pd.DataFrame({'actual1': y_test[:,0], 'predicted1': y_pred[:,0].flatten()})
print(results.groupby('actual1').describe())
print('-'*50)

y_pred_class = model.predict(X_test).round()
for i in range(n):
  print(classification_report(y_pred=y_pred_class[:,i], y_true=y_test[:,i]))
  print('-'*50)

        predicted1                                             
             count      mean       std  min  25%  50%  75%  max
actual1                                                        
0.0          527.0  0.041746  0.200197  0.0  0.0  0.0  0.0  1.0
1.0           72.0  0.041667  0.201229  0.0  0.0  0.0  0.0  1.0
--------------------------------------------------
              precision    recall  f1-score   support

         0.0       0.88      0.96      0.92       527
         1.0       0.12      0.04      0.06        72

    accuracy                           0.85       599
   macro avg       0.50      0.50      0.49       599
weighted avg       0.79      0.85      0.81       599

--------------------------------------------------
              precision    recall  f1-score   support

         0.0       0.94      0.18      0.30       555
         1.0       0.08      0.86      0.14        44

    accuracy                           0.23       599
   macro avg       0.51      0.52

In [1]:
10E3

10000.0