In [35]:
import requests
import json
import plotly as py
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
from IPython.display import display

In [36]:
## Static variables
MAX_PAGE = 100000

"""
This is the API call:
GET https://connection.keboola.com/v2/storage/tables/{{table_id}}/events?sinceId=sinceId&maxId=maxId&component=component&configurationId=configurationId&runId=runId&q=q&limit=limit&offset=offset
"""

stacks = {
    "US": "https://connection.keboola.com/v2/storage/tables",
    "EU-N": "https://connection.north-europe.azure.keboola.com/v2/storage/tables",
    "EU-C": "https://connection.eu-central-1.keboola.com/v2/storage/tables"
    }

In [37]:
## User-entered variables
TOKEN = "7114-"
TABLEID = "in.c-apac-ex-gmail-attachments-586740274.daily_fleet_size"

## How to use this
1) Insert storage token
2) Get table id from the UI `out.c-keboola-apps.component`
3) Select stact and run API calls

In [38]:
#URL = list(stacks.keys())[0]

headers = {
    'X-StorageApi-Token': TOKEN,
    "Accept": "application/json"}
params = {
 #   "component": "keboola.snowflake-transformation",
 #   "configurationId": "809414277",
    "q": "event:storage.tableImportDone",
    "limit": 100,
    "offset": 0
}

stack_selection = widgets.RadioButtons(
    options=list(stacks.keys()),
    value=list(stacks.keys())[0],
    description='Stack:',
)

def func1(x):
    URL = stacks[x]
    print("Selected stack URL: "+URL)
    

#stack_selection.observe(stack_selection_eventhandler, names='value')

interact(func1, x=stack_selection)
#display(stack_selection)



interactive(children=(RadioButtons(description='Stack:', options=('US', 'EU-N', 'EU-C'), value='US'), Output()…

<function __main__.func1(x)>

In [39]:
## List of events

URL = stacks[stack_selection.value]+"/"+TABLEID+"/events"

def grab_events(URL):
    print("Grabbing list of events from API")
    print(URL)
    data_response = []
    raw_data = pd.DataFrame(data_response)
    params["offset"] = 0

    while params["offset"]<MAX_PAGE:
        r = requests.get(URL, headers=headers, params = params)
        #print("Pagination:", params["offset"])
        print(r.status_code,":", r.url)
        raw_data = raw_data.append(pd.DataFrame(json.loads(r.text)), ignore_index = True)
        if len(json.loads(r.text))==0:
            params["offset"] = MAX_PAGE
        params["offset"] = params["offset"]+params["limit"] # offset +1?
    
    return(raw_data)

data = grab_events(URL)

Grabbing list of events from API
https://connection.keboola.com/v2/storage/tables/in.c-apac-ex-gmail-attachments-586740274.daily_fleet_size/events
200 : https://connection.keboola.com/v2/storage/tables/in.c-apac-ex-gmail-attachments-586740274.daily_fleet_size/events?q=event%3Astorage.tableImportDone&limit=100&offset=0



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



200 : https://connection.keboola.com/v2/storage/tables/in.c-apac-ex-gmail-attachments-586740274.daily_fleet_size/events?q=event%3Astorage.tableImportDone&limit=100&offset=100



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



200 : https://connection.keboola.com/v2/storage/tables/in.c-apac-ex-gmail-attachments-586740274.daily_fleet_size/events?q=event%3Astorage.tableImportDone&limit=100&offset=200


In [40]:
data[:3]

Unnamed: 0,id,event,component,message,description,type,runId,created,configurationId,objectId,objectName,objectType,context,params,results,performance,token,idBranch,uri,attachments
0,6880228839,storage.tableImportDone,storage,Imported table in.c-apac-ex-gmail-attachments-...,,success,966152052,2023-04-05T06:02:34+0200,,in.c-apac-ex-gmail-attachments-586740274.daily...,daily_fleet_size,table,"{'remoteAddr': None, 'httpReferer': None, 'htt...","{'importId': '966152281', 'incremental': False...","{'rowsCount': '455', 'sizeBytes': 11776, 'warn...","{'rowsCountDuration': 0.6443438529968262, 'imp...","{'id': 486330, 'name': '[_internal] Upload Fle...",,https://connection.keboola.com/v2/storage/even...,"[{'id': 966152279, 'isSliced': False, 'uploadT..."
1,6876879921,storage.tableImportDone,storage,Imported table in.c-apac-ex-gmail-attachments-...,,success,965827685,2023-04-04T06:02:21+0200,,in.c-apac-ex-gmail-attachments-586740274.daily...,daily_fleet_size,table,"{'remoteAddr': None, 'httpReferer': None, 'htt...","{'importId': '965827899', 'incremental': False...","{'rowsCount': '455', 'sizeBytes': 11776, 'warn...","{'rowsCountDuration': 0.7140810489654541, 'imp...","{'id': 486330, 'name': '[_internal] Upload Fle...",,https://connection.keboola.com/v2/storage/even...,"[{'id': 965827896, 'isSliced': False, 'uploadT..."
2,6873475045,storage.tableImportDone,storage,Imported table in.c-apac-ex-gmail-attachments-...,,success,965499501,2023-04-03T06:02:41+0200,,in.c-apac-ex-gmail-attachments-586740274.daily...,daily_fleet_size,table,"{'remoteAddr': None, 'httpReferer': None, 'htt...","{'importId': '965499857', 'incremental': False...","{'rowsCount': '460', 'sizeBytes': 12288, 'warn...","{'rowsCountDuration': 0.6021811962127686, 'imp...","{'id': 486330, 'name': '[_internal] Upload Fle...",,https://connection.keboola.com/v2/storage/even...,"[{'id': 965499855, 'isSliced': False, 'uploadT..."


In [41]:
stats_df = data[["id","event","component","type","runId","created","params","results","performance","token"]]
stats_df["table_event_id"] = stats_df["id"]
stats_df = stats_df[stats_df['event'] == 'storage.tableImportDone'] 

# Column names that contain JSON
del_list = ["id","token","performance", "warnings", "results", "event","component"]

normalize_list = ["token", "performance","results"]

#stats_df = pd.concat([stats_df, pd.json_normalize(stats_df["token"])], axis=1)

for b in normalize_list:
    try:
        stats_df = pd.concat([stats_df, pd.json_normalize(stats_df[b])], axis=1)
    except:
        print("Issue when normalizing {}".format(a))


for a in del_list:
    #print(a)
    try:
        stats_df.drop(a, axis=1, inplace=True)
    except:
        print("Issue when deleting {}".format(a))


## Once again, in a separate step (I bounced on errors doing everything at once)
normalize_list = ["params"]
del_list = ["params","importId","withoutHeaders", "source.origin", "source.fileId", "source.fileName","csv.delimiter","csv.enclosure","csv.escapedBy","fromSnapshot","async","source.tableName","source.type","source.dataObject","source.workspaceId","columns"]

for b in normalize_list:
    try:
        stats_df = pd.concat([stats_df, pd.json_normalize(stats_df[b])], axis=1)
    except:
        print("Issue when normalizing {}".format(a))

for a in del_list:
    #print(a)
    try:
        stats_df.drop(a, axis=1, inplace=True)
    except:
        print("Issue when deleting {}".format(a))

stats_df[:3]

Issue when deleting source.tableName
Issue when deleting source.type
Issue when deleting source.dataObject
Issue when deleting source.workspaceId


Unnamed: 0,type,runId,created,table_event_id,name,rowsCountDuration,importDuration,importDecomposed,rowsCount,sizeBytes,importedColumns,incremental,source.fileSize
0,success,966152052,2023-04-05T06:02:34+0200,6880228839,[_internal] Upload Fleet Metrics (daily) Sched...,0.644344,9.787265,"[{'name': 'copyToStaging', 'durationSeconds': ...",455,11776,"[date, organization_id, container_type, camera...",False,4451
1,success,965827685,2023-04-04T06:02:21+0200,6876879921,[_internal] Upload Fleet Metrics (daily) Sched...,0.714081,8.154749,"[{'name': 'copyToStaging', 'durationSeconds': ...",455,11776,"[date, organization_id, container_type, camera...",False,4458
2,success,965499501,2023-04-03T06:02:41+0200,6873475045,[_internal] Upload Fleet Metrics (daily) Sched...,0.602181,8.487464,"[{'name': 'copyToStaging', 'durationSeconds': ...",460,12288,"[date, organization_id, container_type, camera...",False,4495


In [42]:
stats_df["created"] = pd.to_datetime(stats_df['created'])
stats_df["rowsCount"] = stats_df["rowsCount"].astype(int)
stats_df = stats_df.sort_values('created',ascending=True)

## Lag columns
stats_df["lag_columns"] = stats_df["importedColumns"].shift()
stats_df['lag_rows'] = stats_df["rowsCount"].shift()

## convert columns to list
stats_df["importedColumns"] = stats_df["importedColumns"].tolist()
stats_df["lag_columns"] = stats_df["lag_columns"].tolist()

## test output
stats_df[:20]



Unnamed: 0,type,runId,created,table_event_id,name,rowsCountDuration,importDuration,importDecomposed,rowsCount,sizeBytes,importedColumns,incremental,source.fileSize,lag_columns,lag_rows
180,success,917309287.917309608.917309788,2022-11-02 05:01:34+01:00,6291836080,[_internal] Upload Fleet Metrics (daily) Sched...,0.607682,7.893643,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4244,,
179,success,917675333.917675504.917675605,2022-11-03 05:01:04+01:00,6295962748,[_internal] Upload Fleet Metrics (daily) Sched...,0.556171,8.017117,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4253,"[date, organization_id, container_type, camera...",435.0
178,success,918044737.918045157.918045366,2022-11-04 05:02:03+01:00,6300055112,[_internal] Upload Fleet Metrics (daily) Sched...,0.66266,8.542227,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4254,"[date, organization_id, container_type, camera...",435.0
177,success,918455630.918455819.918455975,2022-11-05 05:02:57+01:00,6304163994,[_internal] Upload Fleet Metrics (daily) Sched...,0.73042,7.907769,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4253,"[date, organization_id, container_type, camera...",435.0
176,success,918747837.918748115.918748268,2022-11-06 05:02:59+01:00,6307832265,[_internal] Upload Fleet Metrics (daily) Sched...,0.551704,7.195343,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4257,"[date, organization_id, container_type, camera...",435.0
175,success,919056488.919056663.919056782,2022-11-07 06:02:44+01:00,6311750290,[_internal] Upload Fleet Metrics (daily) Sched...,0.672174,8.118134,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4264,"[date, organization_id, container_type, camera...",435.0
174,success,919415366.919415520.919415709,2022-11-08 06:02:48+01:00,6315689367,[_internal] Upload Fleet Metrics (daily) Sched...,0.569942,7.138777,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4248,"[date, organization_id, container_type, camera...",435.0
173,success,919868055.919868256.919868480,2022-11-09 06:03:08+01:00,6319977139,[_internal] Upload Fleet Metrics (daily) Sched...,0.660005,8.387535,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4260,"[date, organization_id, container_type, camera...",435.0
172,success,920243896.920244091.920244353,2022-11-10 06:02:58+01:00,6323986695,[_internal] Upload Fleet Metrics (daily) Sched...,0.953723,8.549387,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4261,"[date, organization_id, container_type, camera...",435.0
171,success,920572341.920572486.920572615,2022-11-11 06:02:37+01:00,6327947663,[_internal] Upload Fleet Metrics (daily) Sched...,0.611991,7.752781,"[{'name': 'copyToStaging', 'durationSeconds': ...",435,11776,"[date, organization_id, container_type, camera...",False,4260,"[date, organization_id, container_type, camera...",435.0


In [43]:
stats_df.tail(40).to_csv('compology.csv', index=False)

In [44]:
## calculate deltas and changes
stats_df['schema_evolution'] = stats_df['importedColumns'].equals(stats_df['lag_columns'])
#stats_df['schema_evolution'] = stats_df['importedColumns']==stats_df['lag_columns']
stats_df['rows_delta'] = stats_df['rowsCount']-stats_df['lag_rows']


In [45]:
del_list = ["lag_columns", "lag_rows"]

for a in del_list:
    #print(a)
    try:
        stats_df.drop(a, axis=1, inplace=True)
    except:
        print("Issue when deleting {}".format(a))

stats_df[:3] 

stats_df["rows_delta"] = stats_df["rows_delta"].fillna(0)


## TODO

Job event frequency

- Informace o neaktivních tabulkách / nejvíce aktivních tabulkách
- Kolikrát v posledních 30ti dnech byla tabulka aktualizovaná
- Přírůstek v tabulkách
- Kolik je aliasů tabulky a kolik je mappings na tabulku
- Jestli existuje extractor, který do tabulky zapisuje

## Charting

In [46]:
## cutting the massage for the graph
#import_data['message'] = import_data['message'].str[:100]

fig = px.line(
    stats_df,
    x="created",
    y="rowsCount",
    title="Table row count",
    markers=True)

print("Note: row count is absolute, not imported chunk count")
fig.show()

Note: row count is absolute, not imported chunk count


In [47]:
fig = px.line(
    stats_df,
    x="created",
    y="importDuration",
    title="Table import duration (s)",
    markers=True)

fig.show()

In [48]:
fig = px.line(
    stats_df,
    x="created",
    y="sizeBytes",
    title="Table size (Bytes)",
    markers=True)

fig.show()

In [49]:
fig = px.line(
    stats_df,
    x="created",
    y="rows_delta",
    title="Rows increments",
    markers=True)

fig.show()

In [50]:
fig = go.Figure()

# https://plotly.com/python/line-charts/

fig.add_trace(go.Scatter(x=stats_df["created"], y=stats_df["schema_evolution"],
                    mode='markers',
                    line = dict(color='blue', width=1, dash='dot'),
                    name='schema_evolution'))


fig.show()

# Anomalies

In [51]:
import scipy
import scipy.stats as stats
import numpy as np
from sklearn.ensemble import IsolationForest

ModuleNotFoundError: No module named 'sklearn'

In [None]:
## anomaly on row imports (deltas => zscore should work)
zscore_rate = stats.zscore(stats_df["rows_delta"])
stats_df = stats_df.assign(zscore=zscore_rate)
stats_df[:3]


Unnamed: 0,type,runId,created,table_event_id,name,rowsCountDuration,importDuration,importDecomposed,rowsCount,sizeBytes,importedColumns,incremental,schema_evolution,rows_delta,zscore
775,success,820822887.8208251,2022-03-01 01:23:58+01:00,5075481904,Orchestrator Jira,0.254694,8.986974,"[{'name': 'copyToStaging', 'durationSeconds': 1.8857030868530273}, {'name': 'updateTargetTable', 'durationSeconds': 1.30153489112854}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 0.9591710567474365}, {'name': 'dedupStaging', 'durationSeconds': 2.2404870986938477}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.8644988536834717}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312
774,success,820887619.8208892,2022-03-01 05:23:45+01:00,5076373795,Orchestrator Jira,0.239649,10.461283,"[{'name': 'copyToStaging', 'durationSeconds': 2.2743239402770996}, {'name': 'updateTargetTable', 'durationSeconds': 1.193911075592041}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 1.409174919128418}, {'name': 'dedupStaging', 'durationSeconds': 2.4119930267333984}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.9938549995422363}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312
773,success,821074877.8210781,2022-03-01 14:24:22+01:00,5078590565,Orchestrator Jira,0.331043,9.436553,"[{'name': 'copyToStaging', 'durationSeconds': 1.6282389163970947}, {'name': 'updateTargetTable', 'durationSeconds': 1.3524930477142334}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 0.9235239028930664}, {'name': 'dedupStaging', 'durationSeconds': 2.607877016067505}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.9739360809326172}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312


In [None]:
## Calculate anomalies with IsolationForest

model=IsolationForest(n_estimators=50, max_samples='auto', contamination=float(0.1),max_features=1.0)
model.fit(stats_df[["rows_delta"]])

stats_df["scores"]=model.decision_function(stats_df[["rows_delta"]])
stats_df['anomaly']=model.predict(stats_df[["rows_delta"]])
stats_df[:3]


X does not have valid feature names, but IsolationForest was fitted with feature names



Unnamed: 0,type,runId,created,table_event_id,name,rowsCountDuration,importDuration,importDecomposed,rowsCount,sizeBytes,importedColumns,incremental,schema_evolution,rows_delta,zscore,scores,anomaly
775,success,820822887.8208251,2022-03-01 01:23:58+01:00,5075481904,Orchestrator Jira,0.254694,8.986974,"[{'name': 'copyToStaging', 'durationSeconds': 1.8857030868530273}, {'name': 'updateTargetTable', 'durationSeconds': 1.30153489112854}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 0.9591710567474365}, {'name': 'dedupStaging', 'durationSeconds': 2.2404870986938477}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.8644988536834717}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312,0.0,1
774,success,820887619.8208892,2022-03-01 05:23:45+01:00,5076373795,Orchestrator Jira,0.239649,10.461283,"[{'name': 'copyToStaging', 'durationSeconds': 2.2743239402770996}, {'name': 'updateTargetTable', 'durationSeconds': 1.193911075592041}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 1.409174919128418}, {'name': 'dedupStaging', 'durationSeconds': 2.4119930267333984}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.9938549995422363}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312,0.0,1
773,success,821074877.8210781,2022-03-01 14:24:22+01:00,5078590565,Orchestrator Jira,0.331043,9.436553,"[{'name': 'copyToStaging', 'durationSeconds': 1.6282389163970947}, {'name': 'updateTargetTable', 'durationSeconds': 1.3524930477142334}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 0.9235239028930664}, {'name': 'dedupStaging', 'durationSeconds': 2.607877016067505}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.9739360809326172}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312,0.0,1


In [None]:
stats_df['anomaly'] = stats_df['anomaly'].replace(1, "NaN")
stats_df['anomaly'] = stats_df['anomaly'].replace(-1, 1)
stats_df[:3]

Unnamed: 0,type,runId,created,table_event_id,name,rowsCountDuration,importDuration,importDecomposed,rowsCount,sizeBytes,importedColumns,incremental,schema_evolution,rows_delta,zscore,scores,anomaly
775,success,820822887.8208251,2022-03-01 01:23:58+01:00,5075481904,Orchestrator Jira,0.254694,8.986974,"[{'name': 'copyToStaging', 'durationSeconds': 1.8857030868530273}, {'name': 'updateTargetTable', 'durationSeconds': 1.30153489112854}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 0.9591710567474365}, {'name': 'dedupStaging', 'durationSeconds': 2.2404870986938477}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.8644988536834717}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312,0.0,
774,success,820887619.8208892,2022-03-01 05:23:45+01:00,5076373795,Orchestrator Jira,0.239649,10.461283,"[{'name': 'copyToStaging', 'durationSeconds': 2.2743239402770996}, {'name': 'updateTargetTable', 'durationSeconds': 1.193911075592041}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 1.409174919128418}, {'name': 'dedupStaging', 'durationSeconds': 2.4119930267333984}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.9938549995422363}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312,0.0,
773,success,821074877.8210781,2022-03-01 14:24:22+01:00,5078590565,Orchestrator Jira,0.331043,9.436553,"[{'name': 'copyToStaging', 'durationSeconds': 1.6282389163970947}, {'name': 'updateTargetTable', 'durationSeconds': 1.3524930477142334}, {'name': 'deleteUpdatedRowsFromStaging', 'durationSeconds': 0.9235239028930664}, {'name': 'dedupStaging', 'durationSeconds': 2.607877016067505}, {'name': 'insertIntoTargetFromStaging', 'durationSeconds': 0.9739360809326172}]",327,43008,"[tasklist_id, task_id, task_key, task_name, task_description, task_type_id, task_type_name, time_estimate, status_name, priority, start_date, due_date, task_created, project_name, project_id, assignee_user_id, created_by_user_id, task_type, billable]",True,False,0.0,-0.215312,0.0,


In [None]:
fig = go.Figure()

# https://plotly.com/python/line-charts/

fig.add_trace(go.Scatter(x=stats_df["created"], y=stats_df["rowsCount"],
                    mode='lines+markers',
                    line = dict(color='blue', width=1, dash='dot'),
                    name='rows'))

fig.add_trace(go.Scatter(x=stats_df["created"], y=stats_df["zscore"],
                    mode='lines+markers',
                    line = dict(color='green', width=1),
                    name='zscore'))

fig.add_trace(go.Scatter(x=stats_df["created"], y=stats_df["anomaly"],
                    mode='markers',
                    line = dict(color='red', width=1),
                    name='anomaly',
                    ))


fig.show()

## testing

In [None]:
column1 = "[project_id, project_name, epic_id, epic_name, task_id, task_name, worklog_id, user_name, comment, worklog_date, worklog_start, worklog_end, duration_s, duration_h, link]"
column2 = "[project_id, project_name, epic_id, epic_name, task_id, task_name, worklog_id, user_name, comment, worklog_date, worklog_start, worklog_end, duration_s, duration_h, link]"
print(column1)
print(column2)

column1==column2

[project_id, project_name, epic_id, epic_name, task_id, task_name, worklog_id, user_name, comment, worklog_date, worklog_start, worklog_end, duration_s, duration_h, link]
[project_id, project_name, epic_id, epic_name, task_id, task_name, worklog_id, user_name, comment, worklog_date, worklog_start, worklog_end, duration_s, duration_h, link]


True