In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import plotly.express as px
import matplotlib as mpl
from pycaret.anomaly import *
import plotly.graph_objects as go
mpl.rcParams['figure.figsize'] = (10, 8)
mpl.rcParams['axes.grid'] = False

## Boeing

In [None]:
df = pd.read_csv("../data/google_trends_boeing_5y.csv")
df = df.iloc[1:]
df = df.reset_index(level=0)
df.columns = ['date','value']
df = df.replace("<\xa01", "0")
df['value'] = df['value'].astype(int)
df['date'] = pd.to_datetime(df['date'])

In [None]:
df.head()

In [None]:
#express to plot entire data
fig = px.line(df, x='date', y='value', template = 'plotly_dark')
#slider
fig.update_xaxes(
    rangeslider_visible = True,
    rangeselector = dict(
        buttons = list([
              dict(count=1, label='1y', step="year", stepmode="backward"),
              dict(count=2, label='2y', step="year", stepmode="backward"),
              dict(count=2, label='5y', step="year", stepmode="backward")
        ])
    )
)
fig.show()

In [None]:
# create moving-averages
df['rolling_3'] = df['value'].rolling(3).mean()
df['rolling_12'] = df['value'].rolling(12).mean()
# plot 
import plotly.express as px
fig = px.line(df, x="date", y=['value', 'rolling_3', 'rolling_12'], template = 'plotly_dark')
fig.show()

In [None]:
smooth_path    = df.rolling(6).mean()
path_deviation = 2 * df.rolling(6).std()

In [None]:
(smooth_path-path_deviation)

In [None]:
under_line     = (smooth_path-path_deviation)["value"]
over_line      = (smooth_path+path_deviation)["value"]

In [None]:
plt.plot(smooth_path, linewidth=2) #mean curve.
plt.fill_between(path_deviation.index, under_line, over_line, color='b', alpha=.1) #std curves.

In [None]:
df

In [None]:
df.set_index('date', drop=True, inplace=True)

In [None]:
#df = df.resample('M').sum()

In [None]:
df

In [None]:
# creature features from date
df['month'] = [i.month for i in df.index]
df.head()

In [None]:
s = setup(df, session_id = 123)

In [None]:
iforest = create_model('iforest', fraction = 0.01)
iforest_results = assign_model(iforest)
iforest_results.head()

In [None]:
iforest_results[iforest_results['Anomaly'] == 1].head()

In [None]:
import plotly.graph_objects as go
# plot value on y-axis and date on x-axis
fig = px.line(iforest_results, x=iforest_results.index, y="value", template = 'plotly_dark')
# create list of outlier_dates
outlier_dates = iforest_results[iforest_results['Anomaly'] == 1].index
# obtain y value of anomalies to plot
y_values = [iforest_results.loc[i]['value'] for i in outlier_dates]
fig.add_trace(go.Scatter(x=outlier_dates, y=y_values, mode = 'markers', 
                name = 'Anomaly', 
                marker=dict(color='red',size=10)))
        
fig.show()

## General

In [None]:
data_name = "facebookdata_5y"

In [None]:
df = pd.read_csv(f"../data/google_trends_{data_name}.csv")
df = df.iloc[1:]
df = df.reset_index(level=0)
df.columns = ['date','value']
df = df.replace("<\xa01", "0")
df['value'] = df['value'].astype(int)
df['date'] = pd.to_datetime(df['date'])

In [None]:
# #express to plot entire data
# fig = px.line(df, x='date', y='value', template = 'plotly_dark')
# #slider
# fig.update_xaxes(
#     rangeslider_visible = True,
#     rangeselector = dict(
#         buttons = list([
#               dict(count=1, label='1y', step="year", stepmode="backward"),
#               dict(count=2, label='2y', step="year", stepmode="backward"),
#               dict(count=2, label='5y', step="year", stepmode="backward")
#         ])
#     )
# )
# fig.show()

In [None]:
# create moving-averages
df['rolling_3'] = df['value'].rolling(3).mean()
df['rolling_12'] = df['value'].rolling(12).mean()
# plot 
fig = px.line(df, 
              x="date", 
              y=['value', 'rolling_3', 'rolling_12'], 
              template = 'plotly_dark', 
              title=f"Anomalies {data_name}")
fig.show()

In [None]:
smooth_path    = df.rolling(6).mean()
path_deviation = 2 * df.rolling(6).std()
under_line     = (smooth_path-path_deviation)["value"]
over_line      = (smooth_path+path_deviation)["value"]
plt.plot(smooth_path, linewidth=2) #mean curve.
plt.fill_between(path_deviation.index, under_line, over_line, color='b', alpha=.1) #std curves.

In [None]:
df.set_index('date', drop=True, inplace=True)
s = setup(df, session_id = 123)
iforest = create_model('iforest', fraction = 0.01)
iforest_results = assign_model(iforest)

# plot value on y-axis and date on x-axis
fig = px.line(iforest_results, 
              x=iforest_results.index, 
              y="value", 
              template = 'plotly_dark', 
              title=f"Anomalies {data_name}")

# create list of outlier_dates
outlier_dates = iforest_results[iforest_results['Anomaly'] == 1].index
# obtain y value of anomalies to plot
y_values = [iforest_results.loc[i]['value'] for i in outlier_dates]
fig.add_trace(go.Scatter(x=outlier_dates, y=y_values, mode = 'markers', 
                name = 'Anomaly', 
                marker=dict(color='red',size=10)))

fig.show()

In [None]:
iforest_results[iforest_results['Anomaly'] == 1].head()

In [None]:
iforest_results.Anomaly_Score

In [None]:
plt.plot(iforest_results.Anomaly_Score, linewidth=2)