This tutorial describes how to evaluate rules that are applicable to two consecutive periods (year and quarter).

In [1]:
from arelle import ModelManager, Cntlr, ModelFormulaObject, ModelXbrl, ViewFileFormulae, XbrlConst, ViewFileRenderedGrid
from arelle import RenderingEvaluator 

In [2]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import join, isfile
import pickle
import re
from src import Evaluator
import logging
import data_patterns
import datetime

In [3]:
DECIMALS = 0
RULES_PATH = join('..', 'ftk-rules')
INSTANCES_DATA_PATH = join('..','data','instances')
DATAPOINTS_PATH = join('..', 'data', 'datapoints')

### Import rules

We start with importing the (t-1)-t rules that are applicable to two consecutive periods. We import a set of rules used to evaluate year data and a set of rules for quarter data.

#### FTK_betweenperiods_JS

In [4]:
dfr_JS = pd.read_excel(join(RULES_PATH,'FTK_betweenperiods_JS.xlsx'), engine='openpyxl')

#### FTK_betweenperiods_BEL

In [5]:
dfr_BEL = pd.read_excel(join(RULES_PATH,'FTK_betweenperiods_BEL.xlsx'), engine='openpyxl')
dfr_BEL

Unnamed: 0.1,Unnamed: 0,pattern_id,cluster,pattern_def,support,exceptions,confidence,pattern status,encodings,pandas co,pandas ex,xbrl co,xbrl ex,Error message
0,0,"FTK.K000,kwartaal consistently reported",0,"(({""FTK.K000,kwartaal (t)""} != 0) & ({""FTK.K00...",17019,0,1.0000,statistical validation rule,{},"df[((((((df[""FTK.K000,kwartaal (t)""]!=0)) & ((...","df[~((((((df[""FTK.K000,kwartaal (t)""]!=0)) & (...",,,
1,1,"FTK.K050,kwartaal consistently reported",0,"(({""FTK.K050,kwartaal (t)""} != 0) & ({""FTK.K05...",5291,0,1.0000,statistical validation rule,{},"df[((((((df[""FTK.K050,kwartaal (t)""]!=0)) & ((...","df[~((((((df[""FTK.K050,kwartaal (t)""]!=0)) & (...",,,
2,2,"FTK.K101-1,kwartaal consistently reported",0,"(({""FTK.K101-1,kwartaal (t)""} != 0) & ({""FTK.K...",16827,0,1.0000,statistical validation rule,{},"df[((((((df[""FTK.K101-1,kwartaal (t)""]!=0)) & ...","df[~((((((df[""FTK.K101-1,kwartaal (t)""]!=0)) &...",,,
3,3,"FTK.K101-1,R010,C010 consistently reported",0,"(({""FTK.K101-1,R010,C010 (t)""} != 0) & ({""FTK....",16809,18,0.9989,statistical validation rule,{},"df[((((((df[""FTK.K101-1,R010,C010 (t)""]!=0)) &...","df[~((((((df[""FTK.K101-1,R010,C010 (t)""]!=0)) ...",,,
4,4,"FTK.K101-1,R020,C010 consistently reported",0,"(({""FTK.K101-1,R020,C010 (t)""} != 0) & ({""FTK....",16809,18,0.9989,statistical validation rule,{},"df[((((((df[""FTK.K101-1,R020,C010 (t)""]!=0)) &...","df[~((((((df[""FTK.K101-1,R020,C010 (t)""]!=0)) ...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1995,"PKB_K802,R140,C550 consistently reported",0,"(({""PKB_K802,R140,C550 (t)""} != 0) & ({""PKB_K8...",538,0,1.0000,statistical validation rule,{},"df[((((((df[""PKB_K802,R140,C550 (t)""]!=0)) & (...","df[~((((((df[""PKB_K802,R140,C550 (t)""]!=0)) & ...",,,
1996,1996,"PKB_K802,R160,C550 consistently reported",0,"(({""PKB_K802,R160,C550 (t)""} != 0) & ({""PKB_K8...",534,4,0.9926,statistical validation rule,{},"df[((((((df[""PKB_K802,R160,C550 (t)""]!=0)) & (...","df[~((((((df[""PKB_K802,R160,C550 (t)""]!=0)) & ...",,,
1997,1997,"PKB_K802,R180,C550 consistently reported",0,"(({""PKB_K802,R180,C550 (t)""} != 0) & ({""PKB_K8...",525,13,0.9758,statistical validation rule,{},"df[((((((df[""PKB_K802,R180,C550 (t)""]!=0)) & (...","df[~((((((df[""PKB_K802,R180,C550 (t)""]!=0)) & ...",,,
1998,1998,"PKB_K802,R190,C550 consistently reported",0,"(({""PKB_K802,R190,C550 (t)""} != 0) & ({""PKB_K8...",538,0,1.0000,statistical validation rule,{},"df[((((((df[""PKB_K802,R190,C550 (t)""]!=0)) & (...","df[~((((((df[""PKB_K802,R190,C550 (t)""]!=0)) & ...",,,


### Import templates

Next we import the reporting data. We import the data of two consecutive periods. In the tutorial 'Convert XBRL-instances to CSV, HTML and pickles' the XBRL-instances are converted to pickle files per template. The pickle files are written to the data/instances folder. The rules are applicable to all tables with closed axis. We import these pickle files. When comparing two periods it can be the case that two different taxonomies are applicable. The right taxonomy has to be selected in the tutorial 'Convert XBRL-instances to CSV, HTML and pickles' to convert the XBRL-instance properly. 

The list _instances_JS_ contains the names of the folders with the converted XBRL-instance for yearly data. The list _instances_BEL_ contains the names of the folders with the converted XBRL-instance for two consecutive quarters. Finally, we also have to define the category of the insurer. The rules are set-up for each type of insurer separately.

In [6]:
instances_JS = []
instances_BEL = []

#### FTK_betweenperiods_JS

In [7]:
# with open(join(DATAPOINTS_PATH, 'JS.pkl'), 'rb') as handle:
#     JS = pickle.load(handle)
# dft = pd.DataFrame()
# for instance in instances_JS:
#     df_closed_axis = pd.DataFrame()
#     tables_closed_axis = []  # for listing all input tables with closed axis
#     tables = [table for table in JS 
#         if isfile(join(INSTANCES_DATA_PATH,instance,table + '.pickle'))]  # JS tables found in the specified instance path
#     for table in [table for table in tables]:  #tables:
#         if isfile(join(INSTANCES_DATA_PATH,instance, table + '.pickle')):
#             df = pd.read_pickle(join(INSTANCES_DATA_PATH,instance, table + '.pickle'))  # read dataframe
#         else:
#             continue   
#         if df.index.nlevels > 2:  # if more than 2 indexes (entity, period), then the table has an open axis
#             continue
#         else:  # closed axis
#             tables_closed_axis.append(table)  # add to relevant list
#             # Add table to dataframe with all data from closed axis tables
#             if len(df_closed_axis) == 0:  # no data yet --> copy dataframe
#                 df_closed_axis = df.copy()
#             else:  # join to existing dataframe
#                 df_closed_axis = df_closed_axis.join(df)
#     if len(dft) == 0:  # no data yet 
#         dft = df_closed_axis
#     else:  # join to existing dataframe
#         dft=dft.append(df_closed_axis)
# dft=dft.reset_index()
# numerical_columns = ['entity','period'] + [dft.columns[c] for c in range(len(dft.columns))
#                          if ((dft.dtypes[c] == 'float64') or (dft.dtypes[c] == 'int64'))]
# df_JS = dft[numerical_columns]
# df_JS['period']=df_JS['period'].apply(lambda x: datetime.datetime.strptime(x,'%Y-%m-%d')) #convert to datetime
# df_JS.fillna(0,inplace=True)

In this tutorial we work with dummy data in order to show results

In [8]:
with open(join('..','tests','data','demo','FTK_JS.pkl'), 'rb') as handle:
    df_JS = pickle.load(handle)
df_JS

Unnamed: 0,entity,period,"FTK.J101-1,R060,C010","FTK.J101-1,R061,C010","FTK.J101-1,R070,C010","FTK.J101-1,R080,C010","FTK.J101-1,R090,C010","FTK.J301-1,R010,C010","FTK.J301-1,R020,C010","FTK.J301-1,R030,C010",...,"FTK.J605-1,R850,C020","FTK.J605-1,R860,C010","FTK.J605-1,R860,C020","FTK.J605-1,R860,C030","FTK.J605-1,R860,C040","FTK.J605-1,R860,C050","FTK.J902,R090,C010","FTK.J902,R100,C010","FTK.J902,R110,C010","FTK.J902,R120,C010"
0,LEGALIDENTIFIER20P32,2018-12-31,2.0,3.0,4.0,5.0,6.0,1132000.0,1177000.0,1134000.0,...,2259000.0,2260000.0,2261000.0,2262000.0,2263000.0,2264000.0,0.038,2292000.0,187.0,188.0
0,LEGALIDENTIFIER20P32,2019-12-31,3002.0,3003.0,3004.0,3005.0,3006.0,1135000.0,1180000.0,1137000.0,...,2262000.0,2263000.0,2264000.0,2265000.0,2266000.0,2267000.0,3000.038,2295000.0,3187.0,3188.0


#### S2_betweenperiods_QRS

In [9]:
# with open(join(DATAPOINTS_PATH, 'BEL.pkl'), 'rb') as handle:
#     BEL = pickle.load(handle)
# dft = pd.DataFrame()
# for instance in instances_BEL:
#     df_closed_axis = pd.DataFrame()
#     tables_closed_axis = []  # for listing all input tables with closed axis
#     tables = [table for table in BEL 
#         if isfile(join(INSTANCES_DATA_PATH,instance,table + '.pickle'))]  # BEL tables found in the specified instance path
#     for table in [table for table in tables]:  #tables:
#         if isfile(join(INSTANCES_DATA_PATH,instance, table + '.pickle')):
#             df = pd.read_pickle(join(INSTANCES_DATA_PATH,instance, table + '.pickle'))  # read dataframe
#         else:
#             continue   
#         if df.index.nlevels > 2:  # if more than 2 indexes (entity, period), then the table has an open axis
#             continue
#         else:  # closed axis
#             tables_closed_axis.append(table)  # add to relevant list
#             # Add table to dataframe with all data from closed axis tables
#             if len(df_closed_axis) == 0:  # no data yet --> copy dataframe
#                 df_closed_axis = df.copy()
#             else:  # join to existing dataframe
#                 df_closed_axis = df_closed_axis.join(df)
#     if len(dft) == 0:  # no data yet 
#         dft = df_closed_axis
#     else:  # join to existing dataframe
#         dft=dft.append(df_closed_axis)
# dft=dft.reset_index()
# numerical_columns = ['entity','period'] + [dft.columns[c] for c in range(len(dft.columns))
#                          if ((dft.dtypes[c] == 'float64') or (dft.dtypes[c] == 'int64'))]
# df_BEL = dft[numerical_columns]
# df_BEL['period']=df_BEL['period'].apply(lambda x: datetime.datetime.strptime(x,'%Y-%m-%d')) #convert to datetime
# df_BEL.fillna(0,inplace=True)

In this tutorial we work with dummy data in order to show results

In [10]:
with open(join('..','tests','data','demo','FTK_BEL.pkl'), 'rb') as handle:
    df_BEL = pickle.load(handle)
df_BEL

Unnamed: 0,entity,period,"FTK.K101-1,R010,C010","FTK.K101-1,R020,C010","FTK.K101-1,R030,C010","FTK.K101-1,R040,C010","FTK.K101-1,R050,C010","FTK.K101-1,R060,C010","FTK.K101-1,R070,C010","FTK.K101-1,R080,C010",...,"FTK.K206,R270,C040","FTK.K206,R270,C050","FTK.K206,R270,C060","FTK.K206,R270,C070","FTK.K206,R270,C090","FTK.K206,R270,C100","FTK.K206,R270,C110","FTK.K206,R270,C120","FTK.K206,R270,C130","FTK.K206,R270,C140"
0,LEGALIDENTIFIER20P32,2018-12-31,1056000.0,1057000.0,1058000.0,1059000.0,1060000.0,1061000.0,1062000.0,1063000.0,...,0.14,0.15,0.16,0.17,0.18,0.19,0.2,0.21,0.22,0.23
0,LEGALIDENTIFIER20P32,2019-12-31,1057500.0,1058500.0,1059500.0,1060500.0,1061500.0,1062500.0,1063500.0,1064500.0,...,1500.14,1500.15,1500.16,1500.17,1500.18,1500.19,1500.2,1500.21,1500.22,1500.23


### Evaluate rules

#### Evaluate FTK_betweenperiods_JS

In [11]:
# Get analyze
miner = data_patterns.PatternMiner(df_patterns=dfr_JS)
miner.df_data = df_JS
miner.convert_to_time(['entity'], 'period')
miner.df_data = miner.df_data.reset_index()

results = miner.analyze()
results

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 766/766 [00:02<00:00, 297.63it/s]


Unnamed: 0_level_0,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,True,"FTK.J101-1,R061,C010 consistently reported",0,1,0,1.0,"(({""FTK.J101-1,R061,C010 (t)""} != 0) & ({""FTK....","[3003.0, 3.0, 3003.0, 3.0]",[]
0,True,"FTK.J101-1,R070,C010 consistently reported",0,1,0,1.0,"(({""FTK.J101-1,R070,C010 (t)""} != 0) & ({""FTK....","[3004.0, 4.0, 3004.0, 4.0]",[]
0,True,"FTK.J101-1,R080,C010 consistently reported",0,1,0,1.0,"(({""FTK.J101-1,R080,C010 (t)""} != 0) & ({""FTK....","[3005.0, 5.0, 3005.0, 5.0]",[]
0,True,"FTK.J101-1,R090,C010 consistently reported",0,1,0,1.0,"(({""FTK.J101-1,R090,C010 (t)""} != 0) & ({""FTK....","[3006.0, 6.0, 3006.0, 6.0]",[]
0,True,"Delta-FTK.J301-12,R010,C010 < 10%",0,1,0,1.0,"IF ({""FTK.J301-12,R010,C010 (t)""} != 0) & ({""F...","[1192000.0, 1189000.0]","[1192000.0, 1189000.0, 1189000.0]"
...,...,...,...,...,...,...,...,...,...
0,False,"Delta-FTK.J601-4,R770,C020 < 10%",0,0,1,0.0,"IF ({""FTK.J601-4,R770,C020 (t)""} != 0) & ({""FT...","[3002.04, 2.04]","[3002.04, 2.04, 2.04]"
0,False,"Delta-FTK.J601-4,R780,C020 < 10%",0,0,1,0.0,"IF ({""FTK.J601-4,R780,C020 (t)""} != 0) & ({""FT...","[3002.06, 2.06]","[3002.06, 2.06, 2.06]"
0,False,"Delta-FTK.J601-4,R790,C020 < 10%",0,0,1,0.0,"IF ({""FTK.J601-4,R790,C020 (t)""} != 0) & ({""FT...","[3002.08, 2.08]","[3002.08, 2.08, 2.08]"
0,False,"Delta-FTK.J601-4,R800,C020 < 10%",0,0,1,0.0,"IF ({""FTK.J601-4,R800,C020 (t)""} != 0) & ({""FT...","[3002.1, 2.1]","[3002.1, 2.1, 2.1]"


#### FTK_betweenperiods_BEL

In [12]:
# Get analyze
miner = data_patterns.PatternMiner(df_patterns=dfr_BEL)
miner.df_data = df_BEL
miner.convert_to_time(['entity'], 'period', set_year=False)
miner.df_data = miner.df_data.reset_index()

results = miner.analyze()
results

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1008/1008 [00:03<00:00, 299.17it/s]


Unnamed: 0_level_0,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,True,"FTK.K101-1,R010,C010 consistently reported",0,1,0,1.0,"(({""FTK.K101-1,R010,C010 (t)""} != 0) & ({""FTK....","[1057500.0, 1056000.0, 1057500.0, 1056000.0]",[]
0,True,"FTK.K101-1,R020,C010 consistently reported",0,1,0,1.0,"(({""FTK.K101-1,R020,C010 (t)""} != 0) & ({""FTK....","[1058500.0, 1057000.0, 1058500.0, 1057000.0]",[]
0,True,"FTK.K101-1,R030,C010 consistently reported",0,1,0,1.0,"(({""FTK.K101-1,R030,C010 (t)""} != 0) & ({""FTK....","[1059500.0, 1058000.0, 1059500.0, 1058000.0]",[]
0,True,"FTK.K101-1,R040,C010 consistently reported",0,1,0,1.0,"(({""FTK.K101-1,R040,C010 (t)""} != 0) & ({""FTK....","[1060500.0, 1059000.0, 1060500.0, 1059000.0]",[]
0,True,"FTK.K101-1,R050,C010 consistently reported",0,1,0,1.0,"(({""FTK.K101-1,R050,C010 (t)""} != 0) & ({""FTK....","[1061500.0, 1060000.0, 1061500.0, 1060000.0]",[]
...,...,...,...,...,...,...,...,...,...
0,False,"Delta-FTK.K101-1,R300,C010 < 10%",0,0,1,0.0,"IF ({""FTK.K101-1,R300,C010 (t)""} != 0) & ({""FT...","[1500.3, 0.3]","[1500.3, 0.3, 0.3]"
0,False,"Delta-FTK.K201-1,R550,C050 < 10%",0,0,1,0.0,"IF ({""FTK.K201-1,R550,C050 (t)""} != 0) & ({""FT...","[1500.0061, 0.0061]","[1500.0061, 0.0061, 0.0061]"
0,False,"Delta-FTK.K201-1,R550,C060 < 10%",0,0,1,0.0,"IF ({""FTK.K201-1,R550,C060 (t)""} != 0) & ({""FT...","[1500.0123, 0.0123]","[1500.0123, 0.0123, 0.0123]"
0,False,"Delta-FTK.K201-1,R190,C070 < 10%",0,0,1,0.0,"IF ({""FTK.K201-1,R190,C070 (t)""} != 0) & ({""FT...","[1500.0147, 0.0147]","[1500.0147, 0.0147, 0.0147]"
