This tutorial describes how to evaluate new rules applicable to the assets and derivatives data.

In [1]:
from arelle import ModelManager, Cntlr, ModelFormulaObject, ModelXbrl, ViewFileFormulae, XbrlConst, ViewFileRenderedGrid
from arelle import RenderingEvaluator 

In [2]:
import pandas as pd
import numpy as np
import math
from os import listdir
from os.path import join, isfile
import re
from src import Evaluator
import logging
import data_patterns

In [3]:
DECIMALS = 0
RULES_PATH = join('..', 'solvency2-rules')
INSTANCES_DATA_PATH = join('..','data','instances','L0058_2020Q2') #path of folder with converted xbrl-instance data
TEST_DATA_PATH = join('..', 'tests', 'data', 'demo') #path of folder with demo data
RESULTS_PATH = join('..', 'results')
DATA_PATH = join('..', 'data')
logging.basicConfig(filename = join(RESULTS_PATH, 'rules.log'),level = logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

### Import rules

We start with importing the new rules applicable to the assets and derivatives data. There are several sets of rules applicable to different templates:
* S.06.02.01 (Information on positions held)
* S.06.02.02 (Information on assets)
* S.06.02.01 (Information on positions held) and S.06.02.02 (Information on assets)
* S.08.01.01.01 (Information on positions held) and S2.08.01.01.02 (Information on derivatives)
* S.08.01.01.02 (Information on derivatives)

#### S06

In [4]:
dfr_s06 = pd.read_excel(join(RULES_PATH,'S2_06_02.xlsx'))
dfr_s06_2 = pd.read_excel(join(RULES_PATH,'S2_06_02_01_02.xlsx'))
dfr_s06_1 = pd.read_excel(join(RULES_PATH,'S2_06_02_01_01.xlsx'))

#Capitalize row-column references:
column_replace = set([column for sublist in [row for row in dfr_s06['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_s06.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)
column_replace = set([column for sublist in [row for row in dfr_s06_2['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_s06_2.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)
column_replace = set([column for sublist in [row for row in dfr_s06_1['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_s06_1.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)

#### S08

In [5]:
dfr_s08 = pd.read_excel(join(RULES_PATH,'S2_08_01_01.xlsx'))
dfr_s08_2 = pd.read_excel(join(RULES_PATH,'S2_08_01_01_02.xlsx'))

#Capitalize row-column references:
column_replace = set([column for sublist in [row for row in dfr_s08['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_s08.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)
column_replace = set([column for sublist in [row for row in dfr_s08_2['pandas ex'].str.findall(r'c\d\d\d\d')] for column in sublist])
for ref in column_replace:
    dfr_s08_2.replace(to_replace=ref, value=ref.capitalize(), inplace=True, regex=True)

### Import templates

Next we import the reporting data. In the tutorial 'Convert XBRL-instances to CSV, HTML and pickles' the XBRL-instances are converted to pickle files per template. The pickle files are written to the data/instances folder. We import these pickle files. We merge dataframes for the sets of rules that are applicable to two templates. For the sake of simplicity we only import the Quarterly Solvency II reporting Solo (QRS) templates.

#### S06

Import data and make index unique if necessary

In [6]:
# df_s06_1 = pd.read_pickle(join(INSTANCES_DATA_PATH,'S.06.02.01.01.pickle')).reset_index()
# df_s06_1['S.06.02.01.01,C0040A'] = df_s06_1['S.06.02.01.01,C0040']
# listt=list(df_s06_1['S.06.02.01.01,C0040A'])
# for i in listt:
#     lenn = len(df_s06_1[df_s06_1['S.06.02.01.01,C0040A']==i])
#     if lenn > 1:
#         list_ind = list(df_s06_1.loc[df_s06_1['S.06.02.01.01,C0040A']==i].index)
#         temp = 0
#         for j in list_ind[1:]:
#             temp=temp+1
#             df_s06_1['S.06.02.01.01,C0040A'].iloc[j] = df_s06_1['S.06.02.01.01,C0040A'].iloc[j] + '_' + str(temp)
# df_s06_1 = df_s06_1.set_index(['entity', 'period', 'S.06.02.01.01,C0040A'])

In [7]:
# df_s06_2 = pd.read_pickle(join(INSTANCES_DATA_PATH, 'S.06.02.01.02.pickle')).reset_index()
# df_s06_2 = df_s06_2.set_index(['entity', 'period', 'S.06.02.01.02,C0040'])
# df_s06_2['S.06.02.01.02,C0040'] = df_s06_2.index.get_level_values(2)

In [8]:
# df_s06 = pd.merge(pd.read_pickle(join(INSTANCES_DATA_PATH,'S.06.02.01.01.pickle')).reset_index(),pd.read_pickle(join(INSTANCES_DATA_PATH, 'S.06.02.01.02.pickle')).reset_index(),how='inner', left_on=['entity','period','S.06.02.01.01,C0040'], right_on=['entity','period','S.06.02.01.02,C0040']).set_index(['entity', 'period', 'S.06.02.01.01,C0040'])
# df_s06 = df_s06.reset_index()
# df_s06['S.06.02.01.02,C0040A'] = df_s06['S.06.02.01.02,C0040']
# listt=list(df_s06['S.06.02.01.02,C0040A'])
# for i in listt:
#     lenn = len(df_s06[df_s06['S.06.02.01.02,C0040A']==i])
#     if lenn > 1:
#         list_ind = list(df_s06.loc[df_s06['S.06.02.01.02,C0040A']==i].index)
#         temp = 0
#         for j in list_ind[1:]:
#             temp=temp+1
#             df_s06['S.06.02.01.02,C0040A'].iloc[j] = df_s06['S.06.02.01.02,C0040A'].iloc[j] + '_' + str(temp)
# df_s06 = df_s06.set_index(['entity', 'period', 'S.06.02.01.02,C0040A'])

In this tutorial we work with dummy data in order to show results

In [9]:
df_s06_1 = pd.read_pickle(join(TEST_DATA_PATH,'S.06.02.01.01.pickle')).reset_index() #Import demo pickles
df_s06_2 = pd.read_pickle(join(TEST_DATA_PATH,'S.06.02.01.02.pickle')).reset_index() #Import demo pickles
df_s06 = pd.merge(df_s06_1,df_s06_2,how='inner', left_on=['entity','period','S.06.02.01.01,C0040'], right_on=['entity','period','S.06.02.01.02,C0040']).set_index(['entity', 'period', 'S.06.02.01.01,C0040'])
df_s06_2 = df_s06_2.set_index(['entity', 'period', 'S.06.02.01.02,C0040'])
df_s06_1 = df_s06_1.set_index(['entity', 'period', 'S.06.02.01.01,C0040'])
df_s06_2['S.06.02.01.02,C0040'] = df_s06_2.index.get_level_values(2)
df_s06['S.06.02.01.01,C0040'] = df_s06.index.get_level_values(2)

#### S08

Import data and make index unique if necessary

In [10]:
# df_s08_2 = pd.read_pickle(join(INSTANCES_DATA_PATH, 'S.08.01.01.02.pickle')).reset_index()
# df_s08_2 = df_s08_2.set_index(['entity', 'period', 'S.08.01.01.02,C0040'])
# df_s08_2['S.08.01.01.02,C0040'] = df_s08_2.index.get_level_values(2)

In [11]:
# df_s08 = pd.merge(pd.read_pickle(join(INSTANCES_DATA_PATH,'S.08.01.01.01.pickle')).reset_index(),pd.read_pickle(join(INSTANCES_DATA_PATH, 'S.08.01.01.02.pickle')).reset_index(),how='inner', left_on=['entity','period','S.08.01.01.01,C0040'], right_on=['entity','period','S.08.01.01.02,C0040']).set_index(['entity', 'period', 'S.08.01.01.01,C0040'])
# df_s08 = df_s08.reset_index()
# df_s08['S.08.01.01.02,C0040A'] = df_s08['S.08.01.01.02,C0040']
# listt=list(df_s08['S.08.01.01.02,C0040A'])
# for i in listt:
#     lenn = len(df_s08[df_s08['S.08.01.01.02,C0040A']==i])
#     if lenn > 1:
#         list_ind = list(df_s08.loc[df_s08['S.08.01.01.02,C0040A']==i].index)
#         temp = 0
#         for j in list_ind[1:]:
#             temp=temp+1
#             df_s08['S.08.01.01.02,C0040A'].iloc[j] = df_s08['S.08.01.01.02,C0040A'].iloc[j] + '_' + str(temp)
# df_s08 = df_s08.set_index(['entity', 'period', 'S.08.01.01.02,C0040A'])

In this tutorial we work with dummy data in order to show results

In [12]:
df_s08_1 = pd.read_pickle(join(TEST_DATA_PATH,'S.08.01.01.01.pickle')).reset_index() #Import demo pickles
df_s08_2 = pd.read_pickle(join(TEST_DATA_PATH, 'S.08.01.01.02.pickle')).reset_index() #Import demo pickles
df_s08 = pd.merge(df_s08_1,df_s08_2,how='inner', left_on=['entity','period','S.08.01.01.01,C0040'], right_on=['entity','period','S.08.01.01.02,C0040']).set_index(['entity', 'period', 'S.08.01.01.01,C0040'])
df_s08_2 = df_s08_2.set_index(['entity', 'period', 'S.08.01.01.02,C0040'])

### Evaluate rules

Now we are ready to evaluate the different sets of rules. First, we construct a PatternMiner-object with the data-patterns package using the rules dataframe. Second, we use the analyze-function to get the results of the rules. We do this for each set of rules separately.

#### S06


In [13]:
miner = data_patterns.PatternMiner(df_patterns=dfr_s06)
results_06 = miner.analyze(df_s06)
results_06

100%|█████████████████████████████████████████████████████████████████████████████| 1500/1500 [00:09<00:00, 160.55it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/IS8356795570,False,Total value 1,0,0,1,0.0,"IF ({""S.06.02.01.01,C0140""} != 0) THEN (ABS({""...",20005463.35,"[187704761.84, 20005463.35, 0.6037, 324668296...."
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/IS8356795570,False,Total value 2,0,0,1,0.0,"IF ({""S.06.02.01.01,C0130""} != 0) THEN (ABS({""...",191323.0,"[187704761.84, 191323.0, 537740889.72, 3246682..."


In [14]:
miner = data_patterns.PatternMiner(df_patterns=dfr_s06_2)
results_06_2 = miner.analyze(df_s06_2)
results_06_2

100%|███████████████████████████████████████████████████████████████████████████| 13547/13547 [01:34<00:00, 142.70it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/IS8356795570,False,Unit price 1,0,0,1,0.0,"IF ({""S.06.02.01.02,C0370""} != 0) THEN ({""S.06...",537740900.0,0.6037
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/IS8356795570,False,Unit price 2,0,0,1,0.0,"IF ({""S.06.02.01.02,C0380""} != 0) THEN ({""S.06...",0.6037,537740900.0


In [15]:
miner = data_patterns.PatternMiner(df_patterns=dfr_s06_1)
results_06_1 = miner.analyze(df_s06_1)
results_06_1

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 65.64it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/IS8356795570,False,Par,0,0,1,0.0,"IF ({""S.06.02.01.01,C0130""} != 0) THEN ({""S.06...",191323.0,20005463.35
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/IS8356795570,False,Quantity,0,0,1,0.0,"IF ({""S.06.02.01.01,C0140""} != 0) THEN ({""S.06...",20005463.35,191323.0


#### S08

In [16]:
miner = data_patterns.PatternMiner(df_patterns=dfr_s08)
results_08 = miner.analyze(df_s08)
results_08

100%|███████████████████████████████████████████████████████████████████████████████| 110/110 [00:00<00:00, 146.88it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/EZ3104183600,True,Contract size 2,0,1,0,1.0,"IF {""S.08.01.01.02,C0380""} = ""USA2"" THEN {""S.0...",USA2,760703
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/EZ3104183600,True,Buyer/seller 2,0,1,0,1.0,"IF {""S.08.01.01.02,C0380""} = ""USA2"" THEN {""S.0...",USA2,FX-FL [open]


In [17]:
miner2 = data_patterns.PatternMiner(df_patterns=dfr_s08_2)
results_08_2 = miner2.analyze(df_s08_2)
results_08_2

100%|███████████████████████████████████████████████████████████████████████████████| 303/303 [00:02<00:00, 118.85it/s]


Unnamed: 0,Unnamed: 1,Unnamed: 2,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/EZ3104183600,True,Currency 1,0,1,0,1.0,"IF ({""S.08.01.01.02,C0380""} = ""USA2"") THEN (({...",USA2,"[0, 0, EUR]"
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/EZ3104183600,True,Counterparty group code,0,1,0,1.0,"IF {""S.08.01.01.02,C0330""} = ""BNP Paribas SA"" ...",BNP Paribas SA,LEI/R0MUWSFPU8MPRO8K5P83
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/EZ3104183600,True,External rating,0,1,0,1.0,"IF (({""S.08.01.01.02,C0260""}= ""The Goldman Sac...","[The Goldman Sachs Group, Inc., 0]",BBB+
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/EZ3104183600,False,Currency 3,0,0,1,0.0,"IF {""S.08.01.01.02,C0380""} = ""USA2"" THEN {""S.0...",USA2,EUR
0LFF1WMNTWG5PTIYYI38,2019-12-31,ISIN/EZ3104183600,False,Credit quality step,0,0,1,0.0,"IF (({""S.08.01.01.02,C0260""}= ""The Goldman Sac...","[The Goldman Sachs Group, Inc., 0]",Credit quality step 1
