# STL Modeling

#### Seasonal and Trend decomposition using Loess (STL)

Links:
- [STL Decomposition Documentation](https://www.statsmodels.org/dev/examples/notebooks/generated/stl_decomposition.html)
- [STL Decomposition Example](https://www.statsmodels.org/dev/examples/notebooks/generated/stl_decomposition.html)
- [Anomaly Detection using STL](https://medium.com/wwblog/anomaly-detection-using-stl-76099c9fd5a7)

Import modules:

In [None]:
from dotenv import load_dotenv
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from amodely.amodely import *

from statsmodels.tsa.seasonal import STL
from statsmodels.api import qqplot
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.stats import norm

load_dotenv()
DATASET_PATH = os.environ.get("DATASET_PATH")

Load dataset into the `ADModel` class:

In [None]:
model = Amodely(pd.read_excel(DATASET_PATH + "Conversion Data Extended Period.xlsx"), measure="conversion_rate")

In [None]:
model.reset_working()
results = model.detect_anomalies(method="stl", dimension="STATE_CODE")
results

In [None]:
dimension = "STATE_CODE"
response = model.measure.upper()
categories = sorted(set(model.df[dimension]))

for category in categories:
    df = pl.category_pipeline(dimension, [category]).fit_transform(model.df)
    anomalies = results[results[dimension] == category]

    plt.plot(df[DATE], df[response])
    plt.scatter(anomalies[DATE], anomalies[response], c="red")
    plt.show()

For each category, decompose the time series down to its residuals. Then fit a normal distribution to the model and identify outliers based on a 95% confidence interval:

In [None]:
def calc_stl(ts):
    df = ts.copy()
    df.set_index(DATE, inplace=True)
    stl = STL(df["CONVERSION_RATE"], period=12).fit()
    
    return stl


anomalies = []

for category in adm.categories:
    df = adm.filter_category(category, inplace=False)
    stl = calc_stl(df)
    plt.rcParams["figure.figsize"] = (9,6)
    stl.plot()
    plt.show()

    data = stl.resid
    mean, std = np.mean(data), np.std(data)
    sig = 0.05
    min_bound, max_bound = norm.ppf(sig/2, loc=mean, scale=std), norm.ppf(1-sig/2, loc=mean, scale=std)
    plt.plot(data)
    plt.axhline(y=min_bound, color='r')
    plt.axhline(y=max_bound, color='r')
    plt.show()
    
    indices = []
    for i, value in enumerate(data):
        if not (min_bound < value < max_bound):
            indices.append(i)
            
    anomalies.append(df.iloc[indices, :].copy())
    
anomaly_df = pd.concat(anomalies)
display(anomaly_df[anomaly_df["QUOTE_DATE"] > datetime(2021, 10, 1)].sort_values("QUOTE_DATE"))

Plot outliers:

In [None]:
for category in adm.categories:
    df_plot = adm.filter_category(category)
    
    anomaly_df_new = anomaly_df[anomaly_df["QUOTE_DATE"] > datetime(2021, 9, 1)].sort_values("QUOTE_DATE")
    anomaly_df_new = anomaly_df_new[anomaly_df_new["STATE_CODE"] == category]

    plt.scatter(anomaly_df_new["QUOTE_DATE"], anomaly_df_new["CONVERSION_RATE"], c="red")    
    plt.plot(df_plot["QUOTE_DATE"], df_plot["CONVERSION_RATE"])
    plt.show()