In [1]:
import pandas as pd
import numpy as np
import os
from os.path import join
import re
import ast
import sys

from Evaluator import Evaluator
import data_patterns


In [2]:
RULES_PATH = '..//solvency2-rules//'
FILENAME_RULES = '2020-01-22 Set aanvullende controleregels Solvency II_tcm46-387021.xlsx'

## Construct test Solvency 2 instance (put here your own data)

In [3]:
RESULTS_PATH = '..\\results\\'
DATA_PATH = '..\\data\\'

df = pd.DataFrame()
df.index.name = "index"
files = [f for f in os.listdir(RESULTS_PATH) if os.path.isfile(os.path.join(RESULTS_PATH, f)) if f[-6:]=='pickle']
for file in files:
    new_df = pd.read_pickle(os.path.join(RESULTS_PATH, file))
    new_df.columns = [col.upper() for col in new_df.columns]
    if list(new_df.index) == [0]: # without z-axis for now
        for col in new_df.columns:
            if col not in df.columns:
                df[col] = new_df[col]
    else:
        for col in new_df.columns:
            if col not in df.columns:
                # we only pick the first line in the z-axis column
                df.loc[0, col] = new_df.iloc[0, new_df.columns.get_loc(col)]
        df.loc[0, new_df.index.name] = new_df.index[0]
df = df.astype(object)

In [4]:
df.head()

Unnamed: 0_level_0,"E.01.01.16.01,EC0020","E.01.01.16.01,EC0030","E.01.01.16.01,EC0040","E.01.01.16.01,EC0050","E.01.01.16.01,EC0060","E.01.01.16.01,EC0010","E.02.01.16.01,EC0010","E.02.01.16.01,ER0050","E.03.01.16.01,EC0020","E.03.01.16.01,ER0030",...,"SR.27.01.01.28,R2421,C0781","T.99.01.01.01,C0050","T.99.01.01.01,C0060","T.99.01.01.01,C0070","T.99.01.01.01,C0080","T.99.01.01.01,C0090","T.99.01.01.01,C0100","T.99.01.01.01,C0110","T.99.01.01.01,C0120","T.99.01.01.01,C0010"
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,"IRAN, ISLAMIC REPUBLIC OF",s2c_CU:SCR,4594460.0,778090000.0,948777000.0,1,362888000.0,Pension entitlements,840734000.0,Home country,...,0,bpfy aq nmai jr,938006000.0,svvf vvzwwn,1999-10-10 00:00:00,80290,796320,0.6436,True,1


## Create simple taxonomy based on instance

In [5]:
df_taxo = pd.DataFrame(columns = ['datapoint', 'template', 'row', 'column', 'dtype'])
for idx, col in enumerate(df.columns):
    df_taxo.loc[idx, "datapoint"] = col.upper()
    df_taxo.loc[idx, "template"] = col[0:13].upper()
    df_taxo.loc[idx, "row"] = col[14:19].upper()
    df_taxo.loc[idx, "column"] = col[20:25].upper()
    df_taxo.loc[idx, "dtype"] = df.dtypes[idx]
df_taxo.head(5)

# for now we only use the list of templates in the instance
instance_templates = list(df_taxo.loc[:, 'template'].unique())
del df_taxo

## Read DNBs Additional Validation Rules

In [6]:
df_rules = pd.read_excel(os.path.join(RULES_PATH, FILENAME_RULES), header = 1)
df_rules = df_rules.set_index('ControleRegelCode')
df_rules = df_rules.drop('S.28.01_129', axis = 0) # double line, should be removed
df_rules = df_rules.drop('S.01.03_110', axis = 0) # double line, should be removed
df_rules.fillna("", inplace = True)


## Parse formulas

In [7]:
evalu = Evaluator(df, df_rules, instance_templates)
evalu.transform_rules()
evalu.evaluate_rules()


In [8]:
evalu.df_patterns

Unnamed: 0_level_0,pattern_id,cluster,pattern_def,support,exceptions,confidence,pattern status,encodings,pandas co,pandas ex,xbrl co,xbrl ex,Error message
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,Pattern 1,0,"IF {""S.01.01.02.01,R0580,C0010""} = ""REPORTED"" ...",0,0,0.0,not defined,{},"df[(df['S.01.01.02.01,R0580,C0010']=='REPORTED...","df[(df['S.01.01.02.01,R0580,C0010']=='REPORTED...",,,
1,Pattern 1,0,"{""S.01.01.02.01,R0590,C0010""} <> ""REPORTED """,1,0,1.0,not defined,{},"df[(df['S.01.01.02.01,R0590,C0010']!='REPORTED...","df[~(df['S.01.01.02.01,R0590,C0010']!='REPORTE...",,,
2,Pattern 1,0,"{""S.01.02.01.01,R0050,C0010""} = ""NETHERLANDS""",0,1,0.0,not defined,{},"df[(df['S.01.02.01.01,R0050,C0010']=='NETHERLA...","df[~(df['S.01.02.01.01,R0050,C0010']=='NETHERL...",,,
3,Pattern 1,0,"{""S.01.02.01.01,R0070,C0010""} = ""DUTCH"" OR {""S...",0,1,0.0,not defined,{},"df[((df['S.01.02.01.01,R0070,C0010']=='DUTCH')...","df[~((df['S.01.02.01.01,R0070,C0010']=='DUTCH'...",,,
4,Pattern 1,0,"{""S.01.02.01.01,R0100,C0010""} = ""REGULAR REPOR...",0,1,0.0,not defined,{},"df[(df['S.01.02.01.01,R0100,C0010']=='REGULAR ...","df[~(df['S.01.02.01.01,R0100,C0010']=='REGULAR...",,,
5,Pattern 1,0,"IF {""S.01.03.01.01,C0040""}<>"" "" THEN {""S.01.03...",1,0,1.0,not defined,{},"df[(df['S.01.03.01.01,C0040']!=' ') & (df['S.0...","df[(df['S.01.03.01.01,C0040']!=' ') & ~(df['S....",,,
6,Pattern 1,0,"IF {""S.01.03.01.01,C0040""}<>"" "" THEN {""S.01.03...",1,0,1.0,not defined,{},"df[(df['S.01.03.01.01,C0040']!=' ') & (df['S.0...","df[(df['S.01.03.01.01,C0040']!=' ') & ~(df['S....",,,
7,Pattern 1,0,"IF {""S.01.03.01.01,C0040""}<>"" "" THEN {""S.01.03...",1,0,1.0,not defined,{},"df[(df['S.01.03.01.01,C0040']!=' ') & (df['S.0...","df[(df['S.01.03.01.01,C0040']!=' ') & ~(df['S....",,,
8,Pattern 1,0,"IF {""S.01.03.01.01,C0040""}<>"" "" THEN {""S.01.03...",1,0,1.0,not defined,{},"df[(df['S.01.03.01.01,C0040']!=' ') & (df['S.0...","df[(df['S.01.03.01.01,C0040']!=' ') & ~(df['S....",,,
9,Pattern 1,0,"IF {""S.01.03.01.02,C0100""}<>"" "" THEN {""S.01.03...",1,0,1.0,not defined,{},"df[(df['S.01.03.01.02,C0100']!=' ') & (df['S.0...","df[(df['S.01.03.01.02,C0100']!=' ') & ~(df['S....",,,


In [9]:
evalu.print_result()

0: Not all templates in instance: ['S.01.01.01.01', 'S.01.01.01.01']
1: Correctly parsed (#co=0, #ex=0)
2: Correctly parsed (#co=1, #ex=0)
3: Not all templates in instance: ['S.02.01.01.01', 'S.02.01.01.01', 'S.02.01.01.01', 'S.01.01.01.01']
4: Not all templates in instance: ['S.02.01.01.01', 'S.02.01.01.01', 'S.02.01.01.01', 'S.01.01.04.01']
5: Correctly parsed (#co=0, #ex=1)
6: Not all templates in instance: ['S.01.02.04.01']
7: Correctly parsed (#co=0, #ex=1)
8: Not all templates in instance: ['S.01.02.04.01', 'S.01.02.04.01']
9: Correctly parsed (#co=0, #ex=1)
10: Not all templates in instance: ['S.01.02.04.01']
11: Not all templates in instance: ['S.01.02.04.01', 'S.01.01.04.01']
12: Not all templates in instance: ['S.01.03.04.01', 'S.01.03.04.01']
13: Correctly parsed (#co=1, #ex=0)
14: Not all templates in instance: ['S.01.03.04.01', 'S.01.03.04.01']
15: Correctly parsed (#co=1, #ex=0)
16: Not all templates in instance: ['S.01.03.04.01', 'S.01.03.04.01']
17: Correctly parsed (#c

140: Not all templates in instance: ['S.23.01.04.01', 'S.23.01.04.01', 'S.23.01.04.01', 'S.23.01.04.01', 'S.23.01.04.01']
141: Not all templates in instance: ['S.23.01.04.01', 'S.23.01.04.01', 'S.23.01.04.01', 'S.23.01.04.01']
142: Not all templates in instance: ['S.23.01.04.01', 'S.23.01.04.01', 'S.23.01.04.01']
143: Not all templates in instance: ['S.23.01.04.01', 'S.23.01.04.01', 'S.23.01.04.01']
144: Not all templates in instance: ['S.23.03.01.04', 'S.23.03.01.04', 'S.23.03.01.04', 'S.23.03.01.04']
145: Not all templates in instance: ['S.23.03.04.04', 'S.23.03.04.04', 'S.23.03.04.04', 'S.23.03.04.04']
146: Not all templates in instance: ['S.23.04.01.01', 'S.23.04.01.01']
147: Not all templates in instance: ['S.23.04.01.01', 'S.23.04.01.01']
148: Not all templates in instance: ['S.23.04.01.01', 'S.23.04.01.01']
149: Not all templates in instance: ['S.23.04.01.01', 'S.23.04.01.01']
150: Not all templates in instance: ['S.23.04.01.01', 'S.23.04.01.01']
151: Not all templates in instan

503: Correctly parsed (#co=0, #ex=0)
504: Correctly parsed (#co=0, #ex=0)
505: Correctly parsed (#co=0, #ex=0)
506: Correctly parsed (#co=1, #ex=0)
507: Datapoints not found: ['S.30.01.01.02,C0200', 'S.30.01.01.02,C0240']
508: Datapoints not found: ['S.30.01.01.02,C0200', 'S.30.01.01.02,C0250']
509: Datapoints not found: ['S.30.01.01.02,C0200', 'S.30.01.01.02,C0260']
510: Datapoints not found: ['S.30.01.01.02,C0200', 'S.30.01.01.02,C0270']
511: Datapoints not found: ['S.30.01.01.02,C0200', 'S.30.01.01.02,C0280']
512: Datapoints not found: ['S.30.01.01.02,C0200', 'S.30.01.01.02,C0290']
513: Datapoints not found: ['S.30.01.01.02,C0200', 'S.30.01.01.02,C0310']
514: Datapoints not found: ['S.30.02.01.01,C0030', 'S.30.02.01.01,C0100']
515: Datapoints not found: ['S.30.02.01.01,C0030', 'S.30.02.01.01,C0110']
516: Datapoints not found: ['S.30.02.01.01,C0030', 'S.30.02.01.01,C0120']
517: Datapoints not found: ['S.30.02.01.02,C0170', 'S.30.02.01.02,C0230']
518: Datapoints not found: ['S.30.02.0