In [1]:
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

import sys
from pathlib import Path

base_dir = Path().resolve().parent
data_dir = base_dir / 'data'
sys.path.append(str(base_dir))

In [6]:
data = pd.read_excel(data_dir / 'originalData.xlsx')

In [7]:
data.head()

Unnamed: 0,Date,Income,Outcome,Balance
0,2017-01-09 00:00:00.000,1.343028,1.487865,-0.155904
1,2017-01-10 00:00:00.000,1.06861,1.194182,-0.125572
2,2017-01-11 00:00:00.000,0.944429,0.936663,0.007767
3,2017-01-12 00:00:00.000,1.672202,0.875379,0.800391
4,2017-01-13 00:00:00.000,0.955924,0.975645,-0.019721


#### Discord Detection using Cumulative Sum:

The cumulative sum method was employed to detect discord in the mean of the time series. By calculating the cumulative sum, we can identify significant shifts or anomalies in the data.

#### Manual Hyperparameter Selection:

For this implementation, hyperparameters were manually selected based on visual correspondence. Although manual selection can be effective, it is recommended to explore automated approaches for hyperparameter optimization to ensure better generalization and scalability of the model.

In [8]:
import tslib.cusum_finder as cusum_finder

var = 1.
alpha = 0.05
beta = 0.005
mean_diff = -0.01 # descending discords

stat_trajectory = []
mean_values = []

cusum = cusum_finder.CusumFinder(alpha, beta, mean_diff, 
                              trsh=0.03, slice_length=5, breaks_max=3)
for k, x_k in enumerate(data['Balance'].values):
    cusum.update(x_k)
    cusum.count_metric()
    stat_trajectory.append(cusum.metric)
    mean_values.append(cusum.mean_hat)

In [None]:
# Statistics
fig, ax = plt.subplots(figsize=(15,8))

for i in range(1, len(stat_trajectory)):
    
    x = [i-1, i]
    y = [stat_trajectory[i-1], stat_trajectory[i]]
    
    ax.plot(x, y, color=cusum.breakpoints[i])
  

plt.title('Statistics values. Red values - discord')
plt.show()

In [None]:
# Saldo
fig, ax = plt.subplots(figsize=(15,8))

for i in range(1, len(data['Balance'].values)):
    
    x = [i-1, i]
    y = [data['Balance'].values[i-1], data['Balance'].values[i]]
    
    ax.plot(x, y, color=cusum.breakpoints[i])
    
ax.plot([i for i in range(1, len(stat_trajectory))], mean_values[1:], color='black',label='Mean value')  
plt.title('Balance values. Red values - discord')
plt.legend()
plt.show()

### Shiryaev-Roberts Statistics for Discord Detection in Variance
The Shiryaev-Roberts statistics method was utilized to detect discord in the variance of the time series. This statistical technique allows us to identify significant changes or anomalies in the variability of the data.

Manual Selection of Hyperparameters. In this implementation, hyperparameters were manually selected based on visual correspondence. While manual selection can provide initial insights, it is advisable to explore automated approaches for hyperparameter optimization. Automated methods, such as grid search or Bayesian optimization, can help identify the optimal hyperparameters for improved model performance and generalization. By automating the selection process, we can ensure better scalability and reproducibility of the model.

In [11]:
import tslib.shiryaev_roberts_finder as sr_finder

alpha = 0.01
beta = 0.05
sigma_diff = 0.5

stat_trajectory = []
mean_values = []

sr = sr_finder.SRFinder(alpha=alpha, beta=beta, sigma_diff=sigma_diff, ceil=200,
                              trsh=20, slice_length=5, breaks_max=3)
for k, x_k in enumerate(data['Balance'].values):
    sr.update(x_k)
    sr.count_metric()
    stat_trajectory.append(sr.metric)
    mean_values.append(sr.mean_hat)

In [None]:
fig, ax = plt.subplots(figsize=(15,8))

for i in range(1, len(stat_trajectory)):
    
    x = [i-1, i]
    y = [stat_trajectory[i-1], stat_trajectory[i]]
    
    ax.plot(x, y, color=sr.breakpoints[i])
    
plt.title('Statistics values. Red values - discord')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(15,8))

for i in range(1, len(data['Balance'].values)):
    
    x = [i-1, i]
    y = [data['Balance'].values[i-1], data['Balance'].values[i]]
    
    ax.plot(x, y, color=sr.breakpoints[i])
    
ax.plot([i for i in range(1, len(stat_trajectory))], mean_values[1:], color='black',label='Mean value')
plt.title('Balance values. Red values - discord')
plt.legend()
plt.show()