### データセット取得
https://raw.githubusercontent.com/oreilly-japan/ml-security-jp/master/ch06/HostLogons-demo.csv  
を取得して、同じディレクトリに格納

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from statsmodels.tsa.seasonal import seasonal_decompose
from prophet import Prophet
from msticpy.analysis.timeseries import timeseries_anomalies_stl
from msticpy.nbtools.timeseries import display_timeseries_anomalies

In [None]:
df = pd.read_csv('HostLogons-demo.csv', parse_dates=['Date'])

In [None]:
df

In [None]:
df_LogonSum = df.groupby(['Date', 'ComputerName'])['TotalLogons'].sum().reset_index()
df_LogonSum = df_LogonSum[['Date', 'TotalLogons']] # ComputerNameが1種類だけなので問題ないが、複数ある場合を考えるとDateだけでgroupbyするべきでは?

In [None]:
df_LogonSum

In [None]:
fig = plt.figure(figsize=(15, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(df_LogonSum['Date'], df_LogonSum['TotalLogons'], label='original')
ax.plot(df_LogonSum['Date'], df_LogonSum['TotalLogons'].rolling(window=7).mean(), label='rolling', ls='dashed')

plt.title('Daily TotalLogons')

daysFmt = mdates.DateFormatter('%Y-%m-%d')
ax.xaxis.set_major_formatter(daysFmt)
fig.autofmt_xdate()

plt.grid(True)

plt.show()

In [None]:
result = seasonal_decompose(df_LogonSum['TotalLogons'],
                            model='multiplicative',
                            period=7)

In [None]:
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(15, 7), sharex=True)
plt.subplots_adjust(hspace=0.5)

axes[0].set_title('Observed')
axes[0].plot(result.observed)

axes[1].set_title('Trend')
axes[1].plot(result.trend)

axes[2].set_title('Seasonal')
axes[2].plot(result.seasonal)

axes[3].set_title('Residual')
axes[3].plot(result.resid)

plt.show()

In [None]:
trend = result.trend
trend = pd.DataFrame({'trend': trend, 'date': df_LogonSum['Date']})
trend['date'] = pd.to_datetime(trend['date'], format='%Y-%m-%d')
trend = trend.set_index('date')
trend = trend.plot()

In [None]:
def fit_predict_model(dataframe):

    model = Prophet(
        daily_seasonality=False,
        weekly_seasonality=False,
        yearly_seasonality=False,
        seasonality_mode='multiplicative',
        interval_width=0.99,
        changepoint_range=0.8
    )
    model.add_seasonality(name='monthly', period=30.5, fourier_order=5)

    model = model.fit(dataframe)

    forecast = model.predict(dataframe)

    forecast['fact'] = dataframe['y'].reset_index(drop=True)

    fig1 = model.plot(forecast)
    
    return forecast

In [None]:
df_LogonSum.columns = ['ds', 'y']
pred = fit_predict_model(df_LogonSum)

In [None]:
pred.head()

In [None]:
def detect_anomalies(forecast):

    forecasted = forecast[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper', 'fact']].copy()

    forecasted['anomalies'] = 0

    forecasted.loc[forecasted['fact'] > forecasted['yhat_upper'], 'anomalies'] = 1

    return forecasted


In [None]:
pred = detect_anomalies(pred)

In [None]:
pred[pred.anomalies == 1]

In [None]:
df = pd.read_csv('HostLogons-demo.csv', parse_dates=['Date'])
df_LogonSum = df.groupby(['Date', 'ComputerName'])['TotalLogons'].sum().reset_index()

df_LogonSum = df_LogonSum[['Date', 'TotalLogons']]
df_LogonSum = df_LogonSum.set_index('Date')

output = timeseries_anomalies_stl(df_LogonSum, seasonal=31)

In [None]:
output[output.anomalies == 1]

In [None]:
timeseries_anomalies_plot = display_timeseries_anomalies(
    data=output,
    y='TotalLogons',
    time_column='Date'
)