# Alerting system for monitoring the passage of payments

In [67]:
import numpy as np
import pandas as pd
from pmdarima import auto_arima
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.seasonal import STL


### Loading and transformation

In [70]:
df=pd.read_csv('data.csv')
df.head(10)

Unnamed: 0,user_id,country_id,date,payment_id,status
0,100276365,96,2022-01-01,11434866,fail
1,98953875,54,2022-01-01,11434865,fail
2,94570279,54,2022-01-01,11434823,fail
3,93106544,96,2022-01-01,11435040,fail
4,99293681,54,2022-01-01,11434821,success
5,92146030,75,2022-01-01,11434813,fail
6,98362355,75,2022-01-01,11434884,fail
7,99293681,75,2022-01-01,11434803,processing
8,94570279,75,2022-01-01,11434822,fail
9,98953875,54,2022-01-01,11434884,fail


In [71]:
# Calculation of the number of users with successful payment by day
success = df[df['status'] == 'success'].groupby('date')['user_id'].nunique()
# Calculation of the number of users with any payment by day
all = df.groupby('date')['user_id'].nunique()
# Successful rate by day
data = pd.DataFrame(success/all).reset_index().sort_values('date')
data.rename(columns={'user_id': 'metric'}, inplace=True)

#Ratio of the number of users per day
coef = df.groupby('date')['user_id'].nunique().mean()/df.groupby('date')['user_id'].nunique().rolling(window=7, min_periods=1).mean()
coef = pd.DataFrame(coef).reset_index().sort_values('date')
data['user_count']=coef['user_id']

# Drop n/a, set index
data.dropna(subset=['metric'], inplace=True)
data["date"] = pd.to_datetime(data["date"])
data.set_index("date", inplace=True)

In [72]:
data.head(10)

Unnamed: 0_level_0,metric,user_count
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-01,0.216393,1.427582
2022-01-02,0.364508,1.206129
2022-01-03,0.448037,1.130942
2022-01-04,0.351351,1.115013
2022-01-05,0.369524,1.043154
2022-01-06,0.439506,1.048345
2022-01-07,0.423174,1.054998
2022-01-08,0.397619,1.01461
2022-01-09,0.41519,1.022095
2022-01-10,0.392936,1.015286


### S ARIMA model

In [73]:
sarima_model = auto_arima(data['metric'], seasonal=True, year=1)
sarima_model

In [74]:
data['sarima_forecast'] = sarima_model.predict_in_sample()


print('MSE data',mean_squared_error(data['metric'], data['sarima_forecast']))
print('MAE data',mean_absolute_error(data['metric'], data['sarima_forecast']))

MSE data 0.0012591644958773662
MAE data 0.025487883988243917


### Calculation of the upper and lower limits of alerting

In [75]:
# Calculation of the difference between real and predicted data
data['sarima_residual'] = data['metric'] - data['sarima_forecast']
#Calculation of the alert threshold
alert_threshold =  data['sarima_forecast'].std() * data['user_count']
# Аnomalies
anomalies = data[data['sarima_residual'].abs() > alert_threshold]
# Upper border
up = data['sarima_forecast'] + alert_threshold
# Lower border
low = data['sarima_forecast'] - alert_threshold

### Linear monitoring diagram

In [77]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=data.index, y=data['metric'], mode='lines',line=dict( width=1.5), name='Дані'))
fig.add_trace(go.Scatter(x=data.index, y=data['sarima_forecast'], mode='lines',line=dict( width=1.5), name='SARIMA'))
fig.add_trace(go.Scatter(x=anomalies.index, y=anomalies['metric'], mode='markers', marker=dict(color='red'), name='Аномалії'))
fig.add_trace(go.Scatter(x=data.index.tolist() + data.index.tolist()[::-1],
                         y=up.tolist() + low.tolist()[::-1],
                         fill='toself', fillcolor='green',opacity=0.2 , line=dict(color='green'),
                         name=' Normal Area'))
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Metric',
    showlegend=True,
    width=2500,
    height=500,
    plot_bgcolor='white',
    xaxis_gridcolor='white', 
    yaxis_gridcolor='lightgray' ,
)
fig.show(renderer='iframe')

In [78]:
data.head(4)

Unnamed: 0_level_0,metric,user_count,sarima_forecast,sarima_residual
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-01,0.216393,1.427582,0.000299,0.216094
2022-01-02,0.364508,1.206129,0.216693,0.147815
2022-01-03,0.448037,1.130942,0.316104,0.131933
2022-01-04,0.351351,1.115013,0.367488,-0.016136


In [81]:
anomalies.count()

metric             39
user_count         39
sarima_forecast    39
sarima_residual    39
dtype: int64

In [82]:
anomalies.head(10)

Unnamed: 0_level_0,metric,user_count,sarima_forecast,sarima_residual
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-01,0.216393,1.427582,0.000299,0.216094
2022-01-02,0.364508,1.206129,0.216693,0.147815
2022-01-03,0.448037,1.130942,0.316104,0.131933
2022-01-06,0.439506,1.048345,0.355002,0.084504
2022-01-13,0.466667,1.07928,0.397579,0.069087
2022-02-17,0.484594,1.109937,0.40977,0.074824
2022-03-17,0.364532,1.145392,0.438721,-0.074189
2022-04-01,0.243169,1.19525,0.425646,-0.182476
2022-04-02,0.169892,1.148413,0.349096,-0.179203
2022-04-03,0.140665,1.151015,0.324186,-0.183521
