This tutorial describes how to evaluate new rules applicable to the assets and derivatives data.

In [1]:
from arelle import ModelManager, Cntlr, ModelFormulaObject, ModelXbrl, ViewFileFormulae, XbrlConst, ViewFileRenderedGrid
from arelle import RenderingEvaluator 

In [2]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import join, isfile
import re
from src import Evaluator
import logging
import data_patterns
import pickle

In [3]:
DECIMALS = 0
RULES_PATH = join('..', 'ftk-rules')
INSTANCES_DATA_PATH = join('..','data','instances')
TEST_DATA_PATH = join('..', 'tests', 'data', 'demo')
FILENAME_DATAPOINTS = 'QRS.csv'
RESULTS_PATH = join('..', 'results')
DATA_PATH = join('..', 'data')
logging.basicConfig(filename = join(RESULTS_PATH, 'rules.log'),level = logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

### Import rules

We start with importing the new rules applicable to the assets and derivatives data. There are several sets of rules applicable to different templates:
* K208A (Information on positions held)
* K208B (Information on assets)
* K208A (Information on positions held) and K208B (Information on assets)
* K210A (Information on positions held) and K210B (Information on derivatives)
* K210B (Information on derivatives)

#### S06

In [4]:
dfr_208 = pd.read_excel(join(RULES_PATH,'FTK_K208.xlsx'))
dfr_208B = pd.read_excel(join(RULES_PATH,'FTK_K208B.xlsx'))
dfr_208A = pd.read_excel(join(RULES_PATH,'FTK_K208A.xlsx'))

#Capitalize row-column references:
column_replace = set([column for sublist in [row for row in dfr_208['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_208.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)
column_replace = set([column for sublist in [row for row in dfr_208B['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_208B.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)
column_replace = set([column for sublist in [row for row in dfr_208A['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_208A.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)

In [5]:
dfr_208

Unnamed: 0,pattern_id,cluster,pattern_def,support,exceptions,confidence,pattern status,encodings,pandas co,pandas ex,xbrl co,xbrl ex,Error message
0,Total solvency 1,0,"IF ({""FTK.K208A,C070""} != 0) THEN (ABS({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208A,C070""]!=0)) & ((ABS(df[""FTK...","df[((df[""FTK.K208A,C070""]!=0)) & ~((ABS(df[""FT...",,,
1,Total solvency 2,0,"IF ({""FTK.K208A,C060""} != 0) THEN (ABS({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208A,C060""]!=0)) & ((ABS(df[""FTK...","df[((df[""FTK.K208A,C060""]!=0)) & ~((ABS(df[""FT...",,,
2,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""NL72"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""NL72"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""NL72"")) & ~((df[""F...",,,
3,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""XT74"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""XT74"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""XT74"")) & ~((df[""F...",,,
4,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""XT72"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""XT72"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""XT72"")) & ~((df[""F...",,,
5,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""XT81"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""XT81"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""XT81"")) & ~((df[""F...",,,
6,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""XT89"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""XT89"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""XT89"")) & ~((df[""F...",,,
7,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""XT84"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""XT84"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""XT84"")) & ~((df[""F...",,,
8,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""XT86"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""XT86"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""XT86"")) & ~((df[""F...",,,
9,CIC 6,0,"IF ({""FTK.K208B,C220""} = ""XT75"") THEN ({""FTK.K...",0,0,0.0000,non-blocking taxonomy rule,{},"df[((df[""FTK.K208B,C220""]==""XT75"")) & ((df[""FT...","df[((df[""FTK.K208B,C220""]==""XT75"")) & ~((df[""F...",,,


#### S08

In [6]:
dfr_210 = pd.read_excel(join(RULES_PATH,'FTK_K210.xlsx'))
dfr_210B = pd.read_excel(join(RULES_PATH,'FTK_K210B.xlsx'))

#Capitalize row-column references:
column_replace = set([column for sublist in [row for row in dfr_210['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_210.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)
column_replace = set([column for sublist in [row for row in dfr_210B['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_210B.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)

In [7]:
dfr_210

Unnamed: 0,pattern_id,cluster,pattern_def,support,exceptions,confidence,pattern status,encodings,pandas co,pandas ex,xbrl co,xbrl ex,Error message
0,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XTD1"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XTD1"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XTD1"") & ~(df[""FTK....",,,
1,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XTD9"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XTD9"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XTD9"") & ~(df[""FTK....",,,
2,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XTF1"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XTF1"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XTF1"") & ~(df[""FTK....",,,
3,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XLE2"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XLE2"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XLE2"") & ~(df[""FTK....",,,
4,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XLD1"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XLD1"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XLD1"") & ~(df[""FTK....",,,
5,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XLD3"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XLD3"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XLD3"") & ~(df[""FTK....",,,
6,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XLD9"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XLD9"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XLD9"") & ~(df[""FTK....",,,
7,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XTD8"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XTD8"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XTD8"") & ~(df[""FTK....",,,
8,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XTF4"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XTF4"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XTF4"") & ~(df[""FTK....",,,
9,Contract size 1,0,"IF {""FTK.K210B,C290""} = ""XTD3"" THEN {""FTK.K210...",0,0,0,non-blocking taxonomy rule,{},"df[(df[""FTK.K210B,C290""]==""XTD3"") & (df[""FTK.K...","df[(df[""FTK.K210B,C290""]==""XTD3"") & ~(df[""FTK....",,,


### Import templates

Next we import the reporting data. In the tutorial 'Convert XBRL-instances to CSV, HTML and pickles' the XBRL-instances are converted to pickle files per template. The pickle files are written to the data/instances folder. We import these pickle files. We merge dataframes for the sets of rules that are applicable to two templates.

#### S06

In [8]:
df_208A = pd.read_pickle(join(TEST_DATA_PATH,'FTK.K208A.pickle')).reset_index() #Import demo pickles
df_208B = pd.read_pickle(join(TEST_DATA_PATH,'FTK.K208B.pickle')).reset_index() #Import demo pickles
#df_208A = pd.read_pickle(join(join(INSTANCES_DATA_PATH,'DNB-NR_FTK-2019-06_2019-12-31_MOD_FTK-BEL'),'FTK.K208A.pickle')).reset_index()
#df_208B = pd.read_pickle(join(join(INSTANCES_DATA_PATH,'DNB-NR_FTK-2019-06_2019-12-31_MOD_FTK-BEL'), 'FTK.K208B.pickle')).reset_index()
df_208 = pd.merge(df_208A,df_208B,how='inner', left_on=['entity', 'period', 'K208A,020'], right_on=['entity','period', 'K208B,130']).set_index(['entity', 'period', 'K208A,020'])
df_208B = df_208B.set_index(['entity','period', 'K208B,130'])
df_208A = df_208A.set_index(['entity', 'period', 'K208A,020'])
df_208B['K208B,130'] = df_208B.index.get_level_values(2)
df_208['K208A,020'] = df_208.index.get_level_values(2)

In [9]:
df_208

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,"K208A,010","FTK.K208A,C030","FTK.K208A,C040","FTK.K208A,C050","FTK.K208A,C060","FTK.K208A,C070","FTK.K208A,C080","FTK.K208A,C090","FTK.K208A,C100","FTK.K208A,C110",...,"FTK.K208B,C240","FTK.K208B,C250","FTK.K208B,C260","FTK.K208B,C270","FTK.K208B,C280","FTK.K208B,C290","FTK.K208B,C300","FTK.K208B,C310","FTK.K208B,C320","K208A,020"
entity,period,"K208A,020",Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
LEGALIDENTIFIER20P32,2018-12-31,Key 2,1,Defined benefit and Defined benefit part of Mixed,AFGHANISTAN,Text 20,0.24,1038000.0,Quoted market price in active markets for the ...,1039000.0,1040000.0,1041000.0,...,Text 27,Euler Hermes Rating GmbH (LEI code: 391200QXGL...,0.25,1900-12-31 00:00:00,1042000.0,0.0315,Not an infrastructure investment,Credit quality step 0,Text 28,Key 2


#### S08

In [10]:
df_210A = pd.read_pickle(join(TEST_DATA_PATH,'FTK.K210A.pickle')).reset_index() #Import demo pickles
df_210B = pd.read_pickle(join(TEST_DATA_PATH, 'FTK.K210B.pickle')).reset_index() #Import demo pickles
#df_210A = pd.read_pickle(join(join(INSTANCES_DATA_PATH,'DNB-NR_FTK-2019-06_2019-12-31_MOD_FTK-BEL'),'FTK.K210A.pickle')).reset_index()
#df_210B = pd.read_pickle(join(join(INSTANCES_DATA_PATH,'DNB-NR_FTK-2019-06_2019-12-31_MOD_FTK-BEL'), 'FTK.K210B.pickle')).reset_index()
df_210 = pd.merge(df_210A,df_210B,how='inner', left_on=['entity', 'period', 'K210A,020'], right_on=['entity', 'period', 'K210B,200']).set_index(['entity', 'period', 'K210A,020'])
df_210A = df_210A.set_index(['entity', 'period', 'K210A,020'])
df_210B = df_210B.set_index(['entity', 'period', 'K210B,200'])
df_210B['K210B,200'] = df_210B.index.get_level_values(2)
df_210['K210A,020'] = df_210.index.get_level_values(2)

In [11]:
df_210

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,"K210A,010","FTK.K210A,C030","FTK.K210A,C040","FTK.K210A,C050","FTK.K210A,C060","FTK.K210A,C070","FTK.K210A,C080","FTK.K210A,C090","FTK.K210A,C100","FTK.K210A,C110",...,"FTK.K210B,C260","FTK.K210B,C270","FTK.K210B,C280","FTK.K210B,C290","FTK.K210B,C300","FTK.K210B,C310","FTK.K210B,C320","FTK.K210B,C330","FTK.K210B,C340","K210A,020"
entity,period,"K210A,020",Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
LEGALIDENTIFIER20P32,2018-12-31,Key 9,8,Defined benefit and Defined benefit part of Mixed,Text 38,Micro hedge [MI],0.28,1048000.0,Open,1049000.0,1050000.0,1.0,...,Text 42,Text 43,EUR,USA2,Text 45,Bankruptcy of the underlying or reference enti...,0,0,1903-12-31 00:00:00,Key 9


### Evaluate rules

Now we are ready to evaluate the different sets of rules. First, we construct a PatternMiner-object with the data-patterns package using the rules dataframe. Second, we use the analyze-function to get the results of the rules. We do this for each set of rules separately.

#### S06


In [12]:
miner = data_patterns.PatternMiner(df_patterns=dfr_208)
results_208 = miner.analyze(df_208)
results_208

100%|██████████| 1500/1500 [00:04<00:00, 341.13it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
LEGALIDENTIFIER20P32,2018-12-31,Key 2,False,Total solvency 1,0,0,1,0.0,"IF ({""FTK.K208A,C070""} != 0) THEN (ABS({""FTK.K...",1038000.0,"[1041000.0, 1038000.0, 0.0315, 1040000.0, 1041..."
LEGALIDENTIFIER20P32,2018-12-31,Key 2,False,Total solvency 2,0,0,1,0.0,"IF ({""FTK.K208A,C060""} != 0) THEN (ABS({""FTK.K...",0.24,"[1041000.0, 0.24, 1042000.0, 1040000.0, 104100..."


In [13]:
miner = data_patterns.PatternMiner(df_patterns=dfr_208B)
results_208B = miner.analyze(df_208B)
results_208B

100%|██████████| 11991/11991 [01:04<00:00, 185.33it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
LEGALIDENTIFIER20P32,2018-12-31,Key 2,False,Unit Solvency 1,0,0,1,0.0,"IF ({""FTK.K208B,C280""} != 0) THEN ({""FTK.K208B...",1042000.0,0.0315
LEGALIDENTIFIER20P32,2018-12-31,Key 2,False,Unit Solvency 2,0,0,1,0.0,"IF ({""FTK.K208B,C290""} != 0) THEN ({""FTK.K208B...",0.0315,1042000.0


In [14]:
miner = data_patterns.PatternMiner(df_patterns=dfr_208A)
results_208A = miner.analyze(df_208A)
results_208A

100%|██████████| 2/2 [00:00<00:00, 136.58it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
LEGALIDENTIFIER20P32,2018-12-31,Key 2,False,Par,0,0,1,0.0,"IF ({""FTK.K208A,C060""} != 0) THEN ({""FTK.K208A...",0.24,1038000.0
LEGALIDENTIFIER20P32,2018-12-31,Key 2,False,Quantity,0,0,1,0.0,"IF ({""FTK.K208A,C070""} != 0) THEN ({""FTK.K208A...",1038000.0,0.24


#### S08

In [15]:
miner = data_patterns.PatternMiner(df_patterns=dfr_210)
results_210 = miner.analyze(df_210)
results_210

100%|██████████| 126/126 [00:00<00:00, 174.87it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
LEGALIDENTIFIER20P32,2018-12-31,Key 9,True,Contract size 2,0,1,0,1.0,"IF {""FTK.K210B,C290""} = ""USA2"" THEN {""FTK.K210...",USA2,1.051e+06
LEGALIDENTIFIER20P32,2018-12-31,Key 9,True,Buyer/seller 2,0,1,0,1.0,"IF {""FTK.K210B,C290""} = ""USA2"" THEN {""FTK.K210...",USA2,Open


In [16]:
miner2 = data_patterns.PatternMiner(df_patterns=dfr_210B)
results2_210B = miner2.analyze(df_210B)
results2_210B

100%|██████████| 568/568 [00:04<00:00, 135.11it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
LEGALIDENTIFIER20P32,2018-12-31,Key 9,True,Currency 1,0,1,0,1.0,"IF ({""FTK.K210B,C290""} = ""USA2"") THEN (({""FTK....",USA2,"[0, 0, EUR]"
LEGALIDENTIFIER20P32,2018-12-31,Key 9,True,Credit quality step 2,0,1,0,1.0,"IF (({""FTK.K210B,C230""}= ""BBB+"") & ({""FTK.K210...","[BBB+, Fitch]",Credit quality step 3
