In [1]:
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt
import rpy2.robjects as robjects
import rpy2.robjects.numpy2ri
from rpy2.robjects.packages import importr
import openpyxl
import pandas as pd
from utils.hmatr import Hmatr

from utils.modelling import modellingSeriesStatistics
from utils.modelling import fixSeriesStatistics

%load_ext autoreload
%load_ext rpy2.ipython
%autoreload 2

rpy2.robjects.numpy2ri.activate()
utils = importr('utils')
utils.chooseCRANmirror(ind=1)

utils.install_packages('Rssa')

rssa = importr('Rssa')

  from pandas.core.index import Index as PandasIndex


In [88]:
N = 700
w1 = 1/10
w2 = 1/5
C1 = 1
C2 = 2
phi1 = 0
phi2 = np.pi/2
Q = 301  # 301 номер, значит разладка в ряде будет на 302й точке, если ряд задан с 0.
B = 100
T_ = 100
L = 50
r = 2
noiseVariance = 0.5

method = "svd"

In [3]:
def plotSeries(s, title='Series', w=16, h=4):
    plt.figure(figsize=(w, h))
    plt.title(title)
    plt.plot(s)

In [4]:
seriesPermanent = lambda n: C1*np.sin(2*np.pi*w1*n + phi1) if n < Q-1 else C1*np.sin(2*np.pi*w2*n + phi1)
seriesTemporary = lambda n: C1*np.sin(2*np.pi*w1*n + phi1) if n < Q-1 else C2*np.sin(2*np.pi*w1*n + phi1)
seriesShifted = lambda n: C1*np.sin(2*np.pi*w1*n + phi1) if n < Q-1 else C1*np.sin(2*np.pi*w1*n + phi2)
seriesOutlier = lambda n: C1*np.sin(2*np.pi*w1*n + phi1)

np.random.seed(0)
eps = np.random.randn(N) * noiseVariance**2

fPerm = [seriesPermanent(i) for i in range(N)]
fPermNoise = fPerm + eps
hmPerm = Hmatr(fPerm, B, T_, L, neig=r, svdMethod=method)
hmPermNoise = Hmatr(fPermNoise, B, T_, L, neig=r, svdMethod=method)

fTemp = [seriesTemporary(i) for i in range(N)]
tmp = deepcopy(eps)
tmp[:Q] = tmp[:Q]/2
fTempNoise = fTemp + tmp
hmTemp = Hmatr(fTemp, B, T_, L, neig=r, svdMethod=method)
hmTempNoise = Hmatr(fTempNoise, B, T_, L, neig=r, svdMethod=method)

fShifted = [seriesShifted(i) for i in range(N)]
fShiftedNoise = fShifted + eps
hmShifted = Hmatr(fShifted, B, T_, L, neig=r, svdMethod=method)
hmShiftedNoise = Hmatr(fShiftedNoise, B, T_, L, neig=r, svdMethod=method)


fOutlier = [seriesOutlier(i) for i in range(N)]
fOutlier[Q] = fOutlier[Q] + C1*10
fOutlierNoise = fOutlier + eps
hmOutlier = Hmatr(fOutlier, B, T_, L, neig=r, svdMethod=method)
hmOutlierNoise = Hmatr(fOutlierNoise, B, T_, L, neig=r, svdMethod=method)

In [41]:
tmp = ["meanMax"]*4 + ["mean95"]*4
tmp

['meanMax',
 'meanMax',
 'meanMax',
 'meanMax',
 'mean95',
 'mean95',
 'mean95',
 'mean95']

In [45]:
[tmp, [1, 2, 3, 4]]

[['meanMax',
  'meanMax',
  'meanMax',
  'meanMax',
  'mean95',
  'mean95',
  'mean95',
  'mean95'],
 [1, 2, 3, 4]]

In [51]:
tmp = pd.DataFrame(tmp, columns=["T"])
tmp["A"] = [1, 2, 3, 4, 5, 6, 7, 8]

In [57]:
tmp

Unnamed: 0,T,A
0,meanMax,1
1,meanMax,2
2,meanMax,3
3,meanMax,4
4,mean95,5
5,mean95,6
6,mean95,7
7,mean95,8


Новое моделирование статистик шума

In [76]:
from utils.modellingNew import modellingNoiseStatistics

In [82]:
?modellingNoiseStatistics

[1;31mSignature:[0m
[0mmodellingNoiseStatistics[0m[1;33m([0m[1;33m
[0m    [0mdictSeries[0m[1;33m:[0m[0mdict[0m[1;33m,[0m[1;33m
[0m    [0miterNum[0m[1;33m:[0m[0mint[0m[1;33m,[0m[1;33m
[0m    [0mN[0m[1;33m:[0m[0mint[0m[1;33m,[0m[1;33m
[0m    [0mB[0m[1;33m:[0m[0mint[0m[1;33m,[0m[1;33m
[0m    [0mT[0m[1;33m:[0m[0mint[0m[1;33m,[0m[1;33m
[0m    [0mQ[0m[1;33m:[0m[0mint[0m[1;33m,[0m[1;33m
[0m    [0mL[0m[1;33m:[0m[0mint[0m[1;33m,[0m[1;33m
[0m    [0mr[0m[1;33m:[0m[0mint[0m[1;33m,[0m[1;33m
[0m    [0mmethod[0m[1;33m:[0m[0mstr[0m[1;33m,[0m[1;33m
[0m    [0mvareps[0m[1;33m:[0m[0mfloat[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Моделирование статистик ряда (средний 95й процентиль и средний максимум) при различных реализациях шума до момента разладки методом Монте-Карло.
Внимание, шум добавляется внутри метода!
:param dict dictSeries: The dictionary where ke

In [83]:
%%time
dictSeries = dict(zip(['Permanent', 'Temporary', 'Shifted', 'Outlier'], [fPerm, fTemp, fShifted, fOutlier]))
statistics = modellingNoiseStatistics(dictSeries, 400, N, B, T_, Q, L, r, method, noiseVariance)
statistics

ValueError: Length of values (1600) does not match length of index (8)

In [87]:
%%time
dictSeries = dict(zip(['Permanent', 'Temporary', 'Shifted', 'Outlier'], [fPerm, fTemp, fShifted, fOutlier]))
statistics = modellingNoiseStatistics(dictSeries, 3, N, B, T_, Q, L, r, method, noiseVariance)
statistics

[0.10960619 0.13301544 0.14376383] [0.03478567 0.03501156 0.03364103] [0.12935306 0.1328779  0.11362963] [0.17760457 0.13246908 0.12629445]


ValueError: Length of values (12) does not match length of index (8)

In [84]:
statistics.to_csv("tables/newStatistics.csv")

## Промоделированные значения

In [None]:
resModelling = pd.read_excel('tables/results.xlsx', sheet_name='Modelling', engine='openpyxl')
resModelling.fillna(' ', inplace=True)
resModelling

In [None]:
%%time
modellingSeriesStatistics(
    dictSeries=dict(zip(['Permanent', 'Temporary', 'Shifted', 'Outlier'], [fPerm, fTemp, fShifted, fOutlier])),
    iterNum=200,
    N=N,
    B=B,
    T=T_,
    Q=Q,
    L=L,
    r=r,
    method=method,
    destFile='tables/results.xlsx',
    modellingResultsPath = 'tables/results.xlsx',
    title='withNoise',
    vareps=noiseVariance
)