In [1]:
from arelle import ModelManager, Cntlr, ModelFormulaObject, ModelXbrl, ViewFileFormulae, XbrlConst, ViewFileRenderedGrid
from arelle import RenderingEvaluator

import pandas as pd
import numpy as np
import os
from os.path import join
import re
import ast
import sys
#from Evaluator import Evaluator
import src
import data_patterns

In [2]:
RULES_PATH = join('..', 'solvency2-rules')
FILENAME_RULES = '2020-01-22 Set aanvullende controleregels Solvency II_tcm46-387021.xlsx'

## Construct test Solvency 2 instance (put here your own data)

In [3]:
RESULTS_PATH = join('..', 'results')
DATA_PATH = join('..', 'data')

In [4]:
df = pd.DataFrame()
df.index.name = "index"
files = [f for f in os.listdir(RESULTS_PATH) if os.path.isfile(join(RESULTS_PATH, f)) if f[-6:]=='pickle']
for file in files:
    new_df = pd.read_pickle(join(RESULTS_PATH, file))
    new_df.columns = [col.upper() for col in new_df.columns]
    if list(new_df.index) == [0]: # without z-axis for now
        for col in new_df.columns:
            if col not in df.columns:
                df[col] = new_df[col]
    else:
        for col in new_df.columns:
            if col not in df.columns:
                # we only pick the first line in the z-axis column
                df.loc[0, col] = new_df.iloc[0, new_df.columns.get_loc(col)]
        df.loc[0, new_df.index.name] = new_df.index[0]
df = df.astype(object)

In [5]:
df.head()

Unnamed: 0_level_0,"E.01.01.16.01,EC0020","E.01.01.16.01,EC0030","E.01.01.16.01,EC0040","E.01.01.16.01,EC0050","E.01.01.16.01,EC0060","E.01.01.16.01,EC0010","E.02.01.16.01,EC0010","E.02.01.16.01,ER0050","E.03.01.16.01,EC0020","E.03.01.16.01,ER0030",...,"SR.27.01.01.28,R2421,C0781","T.99.01.01.01,C0050","T.99.01.01.01,C0060","T.99.01.01.01,C0070","T.99.01.01.01,C0080","T.99.01.01.01,C0090","T.99.01.01.01,C0100","T.99.01.01.01,C0110","T.99.01.01.01,C0120","T.99.01.01.01,C0010"
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,"IRAN, ISLAMIC REPUBLIC OF",s2c_CU:SCR,4594460.0,778090000.0,948777000.0,1,362888000.0,Pension entitlements,840734000.0,Home country,...,0,bpfy aq nmai jr,938006000.0,svvf vvzwwn,1999-10-10 00:00:00,80290,796320,0.6436,True,1


## Create simple taxonomy based on instance

In [6]:
df_taxo = pd.DataFrame(columns = ['datapoint', 'template', 'row', 'column', 'dtype'])
for idx, col in enumerate(df.columns):
    df_taxo.loc[idx, "datapoint"] = col.upper()
    df_taxo.loc[idx, "template"] = col[0:13].upper()
    df_taxo.loc[idx, "row"] = col[14:19].upper()
    df_taxo.loc[idx, "column"] = col[20:25].upper()
    df_taxo.loc[idx, "dtype"] = df.dtypes[idx]
df_taxo.head(5)

# for now we only use the list of templates in the instance
instance_templates = list(df_taxo.loc[:, 'template'].unique())
del df_taxo

## Read DNBs Additional Validation Rules

In [7]:
df_rules = pd.read_excel(join(RULES_PATH, FILENAME_RULES), header = 1)
df_rules = df_rules.set_index('ControleRegelCode')
df_rules = df_rules.drop('S.28.01_129', axis = 0) # double line, should be removed
df_rules = df_rules.drop('S.01.03_110', axis = 0) # double line, should be removed
df_rules.fillna("", inplace = True)

## Parse formulas

In [8]:
evaluator = src.evaluator.Evaluator(df, df_rules, instance_templates)

In [9]:
evaluator.transform_rules()

In [10]:
evaluator.evaluate_rules()

Unnamed: 0_level_0,pattern_id,cluster,pattern_def,support,exceptions,confidence,pattern status,encodings,pandas co,pandas ex,xbrl co,xbrl ex,Error message
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,S.01.01_112,0,"IF {""S.01.01.02.01,R0580,C0010""} = ""REP|TED"" T...",0,0,0,DNB,{},"df[(df['S.01.01.02.01,R0580,C0010']=='REP|TED'...","df[(df['S.01.01.02.01,R0580,C0010']=='REP|TED'...",,,
1,S.01.01_113,0,"{""S.01.01.02.01,R0590,C0010""} != ""REP|TED """,0,0,0,DNB,{},"df[(df['S.01.01.02.01,R0590,C0010']!='REP|TED ')]","df[~(df['S.01.01.02.01,R0590,C0010']!='REP|TED...",,,
2,S.01.02_102,0,"{""S.01.02.01.01,R0050,C0010""} = ""NETHERL&S""",0,0,0,DNB,{},"df[(df['S.01.02.01.01,R0050,C0010']=='NETHERL&...","df[~(df['S.01.02.01.01,R0050,C0010']=='NETHERL...",,,
3,S.01.02_104,0,"{""S.01.02.01.01,R0070,C0010""} = ""DUTCH"" | {""S....",0,0,0,DNB,{},"df[((df['S.01.02.01.01,R0070,C0010']=='DUTCH')...","df[~((df['S.01.02.01.01,R0070,C0010']=='DUTCH'...",,,
4,S.01.02_110,0,"{""S.01.02.01.01,R0100,C0010""} = ""REGULAR REP|T...",0,0,0,DNB,{},"df[(df['S.01.02.01.01,R0100,C0010']=='REGULAR ...","df[~(df['S.01.02.01.01,R0100,C0010']=='REGULAR...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1519,S.31.01_105,0,"IF {""S.31.01.01.02,C0160""} != "" "" THEN {""S.31....",0,0,0,DNB,{},"df[(df['S.31.01.01.02,C0160']!=' ') & (df['S.3...","df[(df['S.31.01.01.02,C0160']!=' ') & ~(df['S....",,,
1520,S.31.01_107,0,"IF {""S.31.01.01.02,C0160""} != "" "" THEN {""S.31....",0,0,0,DNB,{},"df[(df['S.31.01.01.02,C0160']!=' ') & (df['S.3...","df[(df['S.31.01.01.02,C0160']!=' ') & ~(df['S....",,,
1521,S.31.01_111,0,"IF {""S.31.01.01.02,C0160""} != "" "" & {""S.31.01....",0,0,0,DNB,{},"df[((df['S.31.01.01.02,C0160']!=' ') & (df['S....","df[((df['S.31.01.01.02,C0160']!=' ') & (df['S....",,,
1522,S.31.01_112,0,"IF {""S.31.01.01.02,C0160""} != "" "" & {""S.31.01....",0,0,0,DNB,{},"df[((df['S.31.01.01.02,C0160']!=' ') & (df['S....","df[((df['S.31.01.01.02,C0160']!=' ') & (df['S....",,,


In [11]:
miner = data_patterns.PatternMiner(df_patterns = evaluator.df_patterns)
results = miner.analyze(df)

In [12]:
miner.df_patterns[miner.df_patterns['Error message']!='']

Unnamed: 0_level_0,pattern_id,cluster,pattern_def,support,exceptions,confidence,pattern status,encodings,pandas co,pandas ex,xbrl co,xbrl ex,Error message
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
293,S.15.01_105,0,"IF {""S.15.01.01.01,C0040""}!="" "" THEN {""S.15.01...",,,,DNB,{},"df[(df['S.15.01.01.01,C0040']!=' ') & (df['S.1...","df[(df['S.15.01.01.01,C0040']!=' ') & ~(df['S....",,,unorderable types: str() > int()
294,S.15.01_107,0,"IF {""S.15.01.01.01,C0040""}!="" "" THEN {""S.15.01...",,,,DNB,{},"df[(df['S.15.01.01.01,C0040']!=' ') & (df['S.1...","df[(df['S.15.01.01.01,C0040']!=' ') & ~(df['S....",,,unorderable types: str() > int()
1508,S.30.03_102,0,"IF {""S.30.03.01.01,C0060""} = ""OTHER THAN NON-T...",,,,DNB,{},"df[(df['S.30.03.01.01,C0060']=='OTHER THAN NON...","df[(df['S.30.03.01.01,C0060']=='OTHER THAN NON...",,,unorderable types: str() > int()
1509,S.30.03_103,0,"IF {""S.30.03.01.01,C0060""} = ""OTHER THAN NON-T...",,,,DNB,{},"df[(df['S.30.03.01.01,C0060']=='OTHER THAN NON...","df[(df['S.30.03.01.01,C0060']=='OTHER THAN NON...",,,unorderable types: str() > int()


In [13]:
results[results['Q values'] == 'BUG']

Unnamed: 0_level_0,result_type,pattern_id,cluster,support,exceptions,confidence,pattern_def,P values,Q values
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,True,S.15.01_105,0,,,,"IF {""S.15.01.01.01,C0040""}!="" "" THEN {""S.15.01...",unorderable types: str() > int(),BUG
0,True,S.15.01_107,0,,,,"IF {""S.15.01.01.01,C0040""}!="" "" THEN {""S.15.01...",unorderable types: str() > int(),BUG
0,True,S.30.03_102,0,,,,"IF {""S.30.03.01.01,C0060""} = ""OTHER THAN NON-T...",unorderable types: str() > int(),BUG
0,True,S.30.03_103,0,,,,"IF {""S.30.03.01.01,C0060""} = ""OTHER THAN NON-T...",unorderable types: str() > int(),BUG
