In [1]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pandas import DataFrame
from matplotlib import pyplot
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from warnings import simplefilter
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from statsmodels.tools.sm_exceptions import HessianInversionWarning
from statsmodels.tools.sm_exceptions import ValueWarning
from detecta import detect_cusum
import datetime
import pytz
import sklearn.metrics as skm
import copy
import seaborn as sns

In [2]:
def plot(vals, detected_anomalies=[], continuous_anomalies=[], margins=None, title='', save=False, img_name=None, show=True, sec_plots=[]):
	fig, ax = plt.subplots(figsize=(20, 4))
	ax.set_title(title)
	sns.lineplot(data=vals)
	for subplot in sec_plots:
		sns.lineplot(data=subplot)
	for i in detected_anomalies:
		plt.scatter(i, vals[i], s=800, marker="x", color="r", alpha=0.6)
	anom_dev = (max(vals) - min(vals))
	for i in continuous_anomalies:
		anom_top_margins = []
		anom_bottom_margins = []
		for point in range(i[0], i[1]):
			anom_top_margins.append(vals[point]+anom_dev)
			anom_bottom_margins.append(vals[point]-anom_dev)
		plt.fill_between(range(i[0], i[1]), anom_top_margins, anom_bottom_margins, alpha = 0.7, color="r", interpolate=True)
	j = 0
	if margins:
		plt.fill_between(range(len(margins[0])), margins[0], margins[1], alpha = 0.1, color="b")
	fig.tight_layout()
	if save:
		fig.savefig(img_name if img_name is not None else title, bbox_inches='tight')
	if show:	
 		plt.show()
	else:
		plt.close(fig)
	
def sep_anomalies(anomalies):
	anom_start = -1
	isolated_anomalies = []
	continuous_anomalies = []
	for i, anomaly in enumerate(anomalies):
		# Se o próximo for anomalia
		if i < (len(anomalies) - 1) and anomalies[i+1] == anomaly+1:
			# E o anterior não for ou for o primeiro da lista
			if anomalies[i-1] != anomaly-1 or i == 0:
				# É começo de uma contínua
				anom_start = anomaly
		# Se o anterior é anomalia e o próximo não for ou for o fim da lista, é fim de contínuo
		elif i > 0 and anomalies[i-1] == anomaly-1:
			continuous_anomalies.append((anom_start, anomaly))
			anom_start = -1
		# Se o próximo e o anterior não forem anomalia, é isolado
		else:
			isolated_anomalies.append(anomaly)
	return isolated_anomalies, continuous_anomalies

def seq_data(data, window_size=96):
	data_X = []
	data_Y = []
	for i in range(len(data) - (len(data) % window_size) - window_size):
		data_X.append(data[i:i+window_size])
		try:
			data_Y.append(data[i+window_size])
		except:
			print(i+window_size)
	return np.array(data_X), np.array(data_Y)

In [3]:
simplefilter("ignore", category=ConvergenceWarning)
simplefilter("ignore", category=HessianInversionWarning)
simplefilter("ignore", category=RuntimeWarning)
simplefilter("ignore", category=ValueWarning)

In [4]:
stations = pd.read_csv('../Dados/stations_list.csv',sep=';').stations.to_list()

In [5]:
# 1 dia
window_size = 96
# Multiplicador pra não deixar o desvio muito pequeno
# No cusum os limites recomendados são 4 desvios, com a folga de meio desvio dá mais ou menos isso
s = 3.2
min_std = 0
max_std = 15
# Valor inicial caso tenha anomalia desde o começo
std = min_std

In [6]:
for station in stations:
	df = pd.read_csv('../Dados/features/station_{}/export_automaticas_{}_pressao.csv'.format(station, station),sep=';')

	n_anom = 0
	values = []
	positions = []

	data = df.pressao.to_list()

	# Dados separado em janelas
	reshaped_data, targets = seq_data(data, window_size=window_size)

	error_margin1 = []
	error_margin2 = []
	prec = []

	for (i,x) in enumerate(reshaped_data):
		std = max(min(np.std(x) * s, max_std), min_std)
		arima_model = ARIMA(x, order=(1,0,0)).fit()
		prediction = arima_model.forecast()[0]
		error_margin1.append(prediction-std)
		error_margin2.append(prediction+std)
		prec.append(prediction)
		if(i%10000 == 0):
			e = datetime.datetime.now()
			print("Currently at position:", i, "Time:%s:%s:%s" % (e.hour, e.minute, e.second))
			print("std:",std)
		# Detecção de Anomalia
		if(abs(prediction - targets[i]) > std):
			print('anomalia', prediction, "|", targets[i], "|", i, "|", std)
			n_anom = n_anom + 1
			values.append(targets[i])
			positions.append(i)

	bob_series = [data[i + window_size] for i in range(len(prec))]
	bob_predicted_series = prec
	bob_is_anom = positions
	bob_standart_d = [previsto - margem_baixa for previsto, margem_baixa in zip(prec, error_margin1)]

	isolated, continuous = sep_anomalies(bob_is_anom)

	plot(bob_series, detected_anomalies=isolated, continuous_anomalies=continuous, sec_plots=[bob_predicted_series], margins=(np.array(bob_series) + np.array(bob_standart_d), np.array(bob_series) - np.array(bob_standart_d)), save=True, img_name='./results/images/{}_pressao_result'.format(station), show=False)
 
	result = pd.DataFrame({
		'real_value': [data[i + window_size] for i in range(len(prec))],
		'predictions': prec,
		'anomaly': [1 if i in positions else 0 for i in range(len(prec))],
		'std': [previsto - margem_baixa for previsto, margem_baixa in zip(prec, error_margin1)],
		'ceil_margin': error_margin2,
		'floor_margin': error_margin1,
	})
	result.to_csv('./results/data/{}_pressao_result.csv'.format(station), sep=';',index=False)

Currently at position: 0 Time:10:39:31
std: 3.9042711654460343


  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:10:46:19
std: 3.3914926245271744
Currently at position: 20000 Time:10:53:11
std: 3.1657893521556706
anomalia 954.9494395185058 | 951.0 | 22303 | 2.543619468395381
anomalia 951.380610225469 | 956.0 | 22304 | 2.678100487700606
Currently at position: 30000 Time:11:0:30
std: 3.7088482788536328
anomalia 950.8733060120554 | 933.0 | 34417 | 3.6799758453313545
anomalia 939.1244571958504 | 952.0 | 34418 | 6.2960304954788775
Currently at position: 40000 Time:11:7:20
std: 3.80861596325431
Currently at position: 50000 Time:11:14:15
std: 3.0795021242618645
anomalia 946.0223283862485 | 956.0 | 50696 | 4.79583152331272
Currently at position: 60000 Time:11:20:56
std: 3.5752233807445117
Currently at position: 70000 Time:11:27:58
std: 2.901149197588202
Currently at position: 80000 Time:11:34:22
std: 5.968714732298366
anomalia 945.1061745902992 | 950.0 | 80660 | 3.956288945744764
anomalia 950.0794537926328 | 954.0 | 84281 | 3.115374063504342
anomalia 946.2025758978318 | 

  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:12:43:16
std: 4.039251855920296
Currently at position: 20000 Time:12:50:27
std: 3.9731040202284604
Currently at position: 30000 Time:12:57:59
std: 4.7772609539591
anomalia 963.9487508233797 | 948.0 | 34954 | 4.3772137256478585
anomalia 951.7958460868703 | 964.0 | 34955 | 6.056034639560408
Currently at position: 40000 Time:13:5:40
std: 5.203417680290095
Currently at position: 50000 Time:13:13:27
std: 3.3424874303760994
Currently at position: 60000 Time:13:26:27
std: 2.8023799409311607
Currently at position: 70000 Time:13:37:52
std: 5.799616845581745
anomalia 959.9708077931537 | 738.4 | 73422 | 3.6508750853581513
anomalia 963.3312709274257 | 506.6 | 73423 | 15
anomalia 528.3781039363648 | 879.0 | 73427 | 15
anomalia 887.2834901297792 | 959.0 | 73428 | 15
anomalia 961.0844699194325 | 938.0 | 74144 | 2.4212026396446507
anomalia 940.0424519243968 | 879.0 | 74145 | 8.26559132790873
anomalia 880.2656337564991 | 960.0 | 74146 | 15
anomalia 960.8545182821368 | 

  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:15:44:0
std: 4.822055808700498
anomalia 968.9738113625784 | 960.0 | 19299 | 3.656349120213891
anomalia 960.9405452698105 | 969.0 | 19300 | 4.543004390146337
Currently at position: 20000 Time:15:50:16
std: 3.547142825179474
Currently at position: 30000 Time:15:56:44
std: 5.580521680113986
anomalia 964.0079966742325 | 947.0 | 31378 | 5.045129664670011
anomalia 948.8847296717372 | 964.0 | 31379 | 7.207095269400996
Currently at position: 40000 Time:16:3:8
std: 5.2627622658321425
anomalia 964.9878542931517 | 961.0 | 42984 | 3.959657673194602
Currently at position: 50000 Time:16:9:27
std: 3.383620677453205
Currently at position: 60000 Time:16:15:46
std: 5.170751073748056
Currently at position: 70000 Time:16:22:19
std: 4.217292443684165
Currently at position: 80000 Time:16:28:46
std: 3.8157568056677826
anomalia 967.9191063261387 | 958.0 | 88731 | 3.971006029822781
anomalia 959.4975662595554 | 968.0 | 88732 | 4.629854809338577
Currently at position: 90000 Time

  warn('Non-stationary starting autoregressive parameters'


anomalia 955.4144795412728 | 954.0 | 3455 | 1.4138206706965668
Currently at position: 10000 Time:17:46:46
std: 3.5388321990924254
Currently at position: 20000 Time:17:53:3
std: 10.146865963877167
Currently at position: 30000 Time:17:59:32
std: 3.9697467033664617
Currently at position: 40000 Time:18:5:48
std: 3.5476126557948167
Currently at position: 50000 Time:18:14:30
std: 3.1324466830613766
anomalia 960.0348013355983 | 955.0 | 58557 | 4.446221866808818
Currently at position: 60000 Time:18:25:38
std: 4.291464396527911
anomalia 954.0360216183217 | 533.6 | 63411 | 4.974602161647368
anomalia 702.5453435730295 | 525.9 | 63412 | 15
anomalia 551.8818994586222 | 526.6 | 63413 | 15
anomalia 546.1656074860696 | 526.8 | 63414 | 15
anomalia 543.1415319585917 | 526.9 | 63415 | 15
anomalia 531.3822222392884 | 957.0 | 63901 | 8.233329959513476
anomalia 695.6204078973776 | 957.0 | 63902 | 15
anomalia 926.5939422048169 | 957.0 | 63903 | 15
anomalia 935.1477290830287 | 957.0 | 63904 | 15
anomalia 939.

  warn('Non-stationary starting autoregressive parameters'


anomalia 946.0560383880998 | 897.0 | 1971 | 3.898575238326033
anomalia 906.0863381823775 | 946.0 | 1972 | 15
anomalia 947.9744871069472 | 892.0 | 2163 | 4.864383025854586
anomalia 909.9753279263634 | 892.0 | 2164 | 15
anomalia 897.9870511929624 | 952.0 | 9835 | 4.422166387140534
Currently at position: 10000 Time:20:46:28
std: 3.8051135185285716
anomalia 950.1049996152133 | 947.0 | 15206 | 2.0952326839756963
anomalia 954.967824395954 | 947.0 | 18320 | 3.0521395046025592
anomalia 948.2255328882788 | 955.0 | 18321 | 3.6586275508113086
Currently at position: 20000 Time:20:53:23
std: 3.1480152477394387
anomalia 953.9822443392588 | 905.0 | 25507 | 3.1552425509864612
anomalia 937.1681809723149 | 955.0 | 25508 | 15
anomalia 950.9948383395014 | 877.0 | 26253 | 5.013315602805527
anomalia 896.7433037959277 | 951.0 | 26254 | 15
anomalia 950.0176531513083 | 3.3 | 28331 | 4.052982440952177
anomalia 841.9270451425414 | 2.4 | 28332 | 15
anomalia 70.57199595330894 | 2.3 | 28333 | 15
anomalia 51.0634845

  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:23:19:0
std: 4.100406483915141
Currently at position: 20000 Time:23:31:40
std: 3.8848709402272585
Currently at position: 30000 Time:23:42:53
std: 5.235349929936765
Currently at position: 40000 Time:23:56:45
std: 4.816522489191646
anomalia 954.9727737405539 | 956.0 | 45277 | 0.9977753031397176
anomalia 955.7788381795581 | 957.0 | 45283 | 1.0241527663824812
Currently at position: 50000 Time:0:13:3
std: 3.842308333974947
Currently at position: 60000 Time:0:24:38
std: 2.592081960295409
Currently at position: 70000 Time:0:35:56
std: 5.48290271865389
Currently at position: 80000 Time:0:47:17
std: 5.157087894883658
Currently at position: 90000 Time:0:58:39
std: 3.414999593297519
Currently at position: 100000 Time:1:11:35
std: 4.1649329726499404
Currently at position: 110000 Time:1:22:56
std: 8.223475880402129
anomalia 956.1038677800378 | 953.0 | 111611 | 3.0169889330626027
anomalia 951.0387582955949 | 956.0 | 113351 | 4.289392601393453
anomalia 956.8661827505

  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:2:15:13
std: 4.429697757434723
Currently at position: 20000 Time:2:27:33
std: 4.039251855920296
Currently at position: 30000 Time:2:39:8
std: 5.235668268924439
Currently at position: 40000 Time:2:56:52
std: 5.471136384091822
anomalia 970.9896053350035 | 968.0 | 45273 | 2.5066578014027634
anomalia 968.2825774330802 | 971.0 | 45274 | 2.6681246014549043
anomalia 975.3113338394618 | 973.0 | 49581 | 1.8366636418607882
Currently at position: 50000 Time:3:11:18
std: 3.7232005347848536
Currently at position: 60000 Time:3:18:6
std: 3.0455796748001123
Currently at position: 70000 Time:3:24:30
std: 5.329060788627663
anomalia 965.9792353539791 | 960.0 | 79696 | 5.221749387577564
anomalia 960.2396466623502 | 966.0 | 79697 | 5.493834928556029
Currently at position: 80000 Time:3:30:48
std: 4.949073987457991
Currently at position: 90000 Time:3:37:18
std: 3.704501765869917
anomalia 972.9627663231156 | 970.0 | 93407 | 2.932575659723036
anomalia 970.1533829332366 | 975.0

  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:4:42:19
std: 3.394112549695428
Currently at position: 20000 Time:4:48:29
std: 3.0919249667480617
Currently at position: 30000 Time:4:54:51
std: 5.286040315985323
anomalia 926.9603192520914 | 714.1 | 31135 | 5.675679107678071
anomalia 902.4655133258015 | 555.7 | 31136 | 15
anomalia 560.0126811921123 | 501.6 | 31137 | 15
anomalia 501.80096791849303 | 544.5 | 31476 | 0.7411402626283815
anomalia 502.35752566351823 | 631.1 | 31486 | 14.079412866546196
anomalia 564.2276173504148 | 506.4 | 31487 | 15
anomalia 503.8301903565815 | 764.9 | 31556 | 15
anomalia 515.8729503363152 | 578.7 | 31557 | 15
anomalia 523.4868514631156 | 538.5 | 31558 | 15
anomalia 504.7864370624529 | 540.2 | 31591 | 15
anomalia 502.21764604855 | 516.1 | 31667 | 12.54551004233078
anomalia 502.7820678706618 | 525.7 | 31668 | 13.324119733275701
anomalia 503.09219759405727 | 518.2 | 31684 | 15
anomalia 505.4382324289099 | 676.3 | 31685 | 15
anomalia 650.2643187235346 | 747.2 | 31686 | 15
anoma

  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:10:58:28
std: 4.747865017270628
Currently at position: 20000 Time:11:5:42
std: 4.398863489584555
Currently at position: 30000 Time:11:13:0
std: 4.344600736034965
anomalia 980.9657327099522 | 968.0 | 35049 | 4.8767019821004265
anomalia 969.7168704698887 | 981.0 | 35050 | 5.977364711048581
Currently at position: 40000 Time:11:20:56
std: 6.079473661428266
Currently at position: 50000 Time:11:28:57
std: 3.215932558717956
Currently at position: 60000 Time:11:36:6
std: 2.9662921111867737
anomalia 979.9306081856596 | 24.2 | 66136 | 7.892612579705996
anomalia 1235.2432197793003 | 980.0 | 66137 | 15
anomalia 966.7174606890987 | 982.0 | 66145 | 15
anomalia 966.7884175981189 | 982.0 | 66146 | 15
anomalia 966.8760821279918 | 982.0 | 66147 | 15
anomalia 966.9636904926477 | 982.0 | 66148 | 15
Currently at position: 70000 Time:11:42:50
std: 4.8413726244618775
anomalia 972.9792773737767 | 966.0 | 79711 | 5.406066551158574
anomalia 966.3004268523237 | 973.0 | 79712 | 5

  warn('Non-stationary starting autoregressive parameters'


anomalia 947.0278188732466 | 939.0 | 3975 | 6.588542243082978
anomalia 939.1873171220981 | 947.0 | 3976 | 7.239935542991159
anomalia 953.9597777045898 | 946.0 | 9394 | 4.044200236827493
anomalia 946.8625975481092 | 954.0 | 9395 | 4.448095722391274
Currently at position: 10000 Time:13:8:45
std: 3.8987177379235853
anomalia 952.004315858544 | 924.0 | 11674 | 3.9592367390136647
anomalia 928.8362319768104 | 952.0 | 11675 | 9.952554111047744
anomalia 955.0552415923537 | 945.0 | 15160 | 2.5157283018817616
anomalia 946.2014881741399 | 954.0 | 15161 | 4.180377441757569
anomalia 954.0327417863032 | 917.0 | 16021 | 4.113797920818831
anomalia 922.3263652293102 | 954.0 | 16022 | 12.91338668032347
Currently at position: 20000 Time:13:15:36
std: 5.728583303633339
Currently at position: 30000 Time:13:22:56
std: 3.131914572412359
Currently at position: 40000 Time:13:30:6
std: 3.948417404479927
anomalia 947.1069931007065 | 952.0 | 49929 | 4.60422511275121
anomalia 948.2027810139143 | 953.0 | 49948 | 4.4

  warn('Non-stationary starting autoregressive parameters'


Currently at position: 10000 Time:14:23:9
std: 3.422799374131582
anomalia 933.8879634683755 | 9.0 | 15608 | 2.7680719322702263
anomalia 1037.9535557861175 | 934.0 | 15609 | 15
Currently at position: 20000 Time:14:30:9
std: 2.14993539954628
anomalia 927.0227618600544 | 932.0 | 21366 | 3.0608640755330656
Currently at position: 30000 Time:14:37:32
std: 5.899058305262704
anomalia 932.978589289169 | 670.7 | 32263 | 5.655282682786265
anomalia 892.3980318098495 | 500.0 | 32264 | 15
anomalia 499.9999951754418 | 500.0 | 32360 | 0.0
anomalia 499.9999951754418 | 500.0 | 32361 | 0.0
anomalia 499.9999951754418 | 500.0 | 32362 | 0.0
anomalia 499.9999951754418 | 500.0 | 32363 | 0.0
anomalia 499.9999951754418 | 500.0 | 32364 | 0.0
anomalia 499.9999951754418 | 500.0 | 32365 | 0.0
anomalia 499.9999951754418 | 500.0 | 32366 | 0.0
anomalia 499.9999951754418 | 500.0 | 32367 | 0.0
anomalia 499.9999951754418 | 500.0 | 32368 | 0.0
anomalia 499.9999951754418 | 500.0 | 32369 | 0.0
anomalia 499.9999951754418 | 5