# Tutorial - Evaluate DNBs additional Rules (QRS)

This notebook is a tutorial for the evaluation of DNBs additional Rules for the quarterly Solvency II reports for solo entities.

## Import packages

In [None]:
from arelle import ModelManager, Cntlr, ModelFormulaObject, ModelXbrl, ViewFileFormulae, XbrlConst, ViewFileRenderedGrid
from arelle import RenderingEvaluator

import pandas as pd  # dataframes
import numpy as np  # mathematical functions, arrays and matrices
from os.path import join, isfile  # some os dependent functionality
import re  # regular expressions
from src import Evaluator  # conversion from 'rules' to expressions for the data-patterns packages
import data_patterns  # evaluation of patterns
from pprint import pprint  # pretty print
import logging

## General parameters

In [None]:
# path to the source file with the additional rules
RULES_PATH = join('..', 'solvency2-rules')  
# path to the source data
INSTANCES_DATA_PATH = join('..', 'data', 'instances', 'all')
# path to the results
RESULTS_PATH = join('..', 'results') 
# input parameters
PARAMETERS = {'decimal': 0}  
# currently only 'decimal' is availabl,e which specifies tolerance during evaluation of patterns.
# decimal: 0 means tolerance = abs(1.5e-0) (= 1.5)

In [None]:
# We log to rules.log in the data/instances path
logging.basicConfig(filename = join(INSTANCES_DATA_PATH, 'rules.log'),level = logging.INFO, 
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

## Read file with all possible datapoints

We use a simplified taxonomy with all possible datapoints, located in the data/datapoints directory

In [None]:
DATAPOINTS_PATH = join('..', 'data', 'datapoints')  # path to file
FILENAME_DATAPOINTS = 'QRS.csv'  # filename
df_datapoints = pd.read_csv(join(DATAPOINTS_PATH, FILENAME_DATAPOINTS), sep=";")  # load file to dataframe
df_datapoints.head()

In [None]:
# Add columns with only R(ow) and C(olumn) reference:
df_datapoints['rij'] = df_datapoints['datapunt'].apply(lambda x: "" if re.search("R\d\d\d\d", x) is None else re.search("R\d\d\d\d", x)[0])
df_datapoints['kolom'] = df_datapoints['datapunt'].apply(lambda x: "" if re.search("C\d\d\d\d", x) is None else re.search("C\d\d\d\d", x)[0])
df_datapoints.head()

## Read input data

We distinguish 2 types of tables: with a closed axis, and with an open axis.

An example of a table with an open axis is the list of assets: an entity reports several 'rows of data' in the relevant table. An example of a closed axis is the balance sheet: an entity reports only 1 balance sheet per period.

### Read tables from source path

We combine all tables with closed axes into one DataFrame. This DataFrame is then used for all validation rules for closed axes tables. 

Tables with an open axis are put in a dictionary of DataFrames. We perform validation rules per tables with an open axis.

In [None]:
tables_complete_set = df_datapoints.tabelcode.sort_values().unique().tolist()  # list of all QRS tables
tables = [table for table in tables_complete_set
          if isfile(join(INSTANCES_DATA_PATH, table + '.pickle'))]  # QRS tables found in the input folder
tables_closed_axis = []  # for listing all input tables with closed axis
tables_open_axis = []  # for listing all input tables with open axis
df_closed_axis = pd.DataFrame()  # one dataframe with all data from closed axis tables
dict_open_axis = {}  # dictionary with all open axis tables

for table in tables:
    df = pd.read_pickle(join(INSTANCES_DATA_PATH, table + '.pickle'))  # read dataframe
    
    if df.index.nlevels > 2:  # if more than 2 indexes (entity, period) --> open axis
        # Identify which columns within an open axis table make a 'table row' unique (index-columns):
        index_columns_open_axis = [col for col in list(df.index.names) if col not in ['entity','period']]
        
        # Duplicate index-columns to data columns:
        df.reset_index(level=index_columns_open_axis, inplace=True)
        for i in range(len(index_columns_open_axis)):
            df['index_col_' + str(i)] = df[index_columns_open_axis[i]].astype(str)
            df.set_index(['index_col_' + str(i)], append=True, inplace=True)
        
        # Add to relevant list/dict
        tables_open_axis.append(table)
        dict_open_axis[table] = df
    else:  # closed axis
        tables_closed_axis.append(table)  # add to relevant list
        # Add table to dataframe with all data from closed axis tables
        if len(df_closed_axis) == 0:  # no data yet --> copy dataframe
            df_closed_axis = df.copy()
        else:  # join to existing dataframe
            df_closed_axis = df_closed_axis.join(df)

print('Closed-axis tables:')
pprint(tables_closed_axis)
print()
print('Open-axis tables:')
pprint(tables_open_axis)

### Add not reported datapoints as 0's to the dataframes

Here we perform some necessary data cleaning.

In [None]:
# List with all possible datapoints:
all_datapoints = [x.replace(',,',',') for x in list(df_datapoints['tabelcode'] + ',' + df_datapoints['rij'] + ',' + df_datapoints['kolom'])]
# List with all possible datapoints for closed axis tables:
all_datapoints_closed = [x for x in all_datapoints if x[:13] in tables_closed_axis]
# List with all possible datapoints for open axis tables:
all_datapoints_open = [x for x in all_datapoints if x[:13] in tables_open_axis]

# Add not reported datapoints to the dataframe with data from closed axis tables
for column in [column for column in all_datapoints_closed if column not in list(df_closed_axis.columns)]:
    df_closed_axis[column] = np.NaN
df_closed_axis.fillna(0, inplace = True)

# Add not reported datapoints to the dataframes with data from open axis tables
for table in [table for table in dict_open_axis.keys()]:
    all_datapoints_table = [x for x in all_datapoints_open if x[:13] == table]
    for column in [column for column in all_datapoints_table if column not in list(dict_open_axis[table].columns)]:
        dict_open_axis[table][column] = np.NaN
    dict_open_axis[table].fillna(0, inplace = True)

### Compare reported tables with the complete set

In [None]:
diff = list(np.setdiff1d(tables_complete_set, tables))
if len(diff) == 0:
    print('Found files for all possible tables')
else:
    print('No file found for the following tables:', diff)

## Read DNBs Additional Validation Rules

DNBs additional validation rules are currently published as an Excel file on the DNB statistics website. We included the Excel file here in the project.

Here we read the Excel and perform some data cleaning.

In [None]:
FILENAME_RULES = '2020-01-22 Set aanvullende controleregels Solvency II_tcm46-387021.xlsx'

def read_rules(entrypoint):  # function for loading the addtional rules to a dataframe, with filter on entrypoint (i.e. QRS)
    df = pd.read_excel(join(RULES_PATH, FILENAME_RULES), header = 1)
    df = df[(df['Standaard'] == 'SOLVENCY') | (df['Standaard'] == entrypoint)]
    df.drop_duplicates(inplace=True) #remove double lines
    df.fillna("", inplace = True)
    df = df.set_index('ControleRegelCode')
    return df

In [None]:
df_rules = read_rules('QRS')
df_rules.head()

## Evaluate rules for tables with a closed axis

The evaluator is a piece of Python code, which takes the Additional Validation Rules as input, and transforms it to expressions that can be interpreted by the data_patterns package.

The data-patterns package is also called from within the evaluator, to evaluate the results.

Here we evaluate the validation rules for tables with closed axes (that are put into a single dataframe).

In [None]:
evaluator = Evaluator(df_closed_axis, df_rules, df_datapoints, PARAMETERS)

In [None]:
evaluator.df_patterns.head()  # resulting patterns for closed axis tables

In [None]:
evaluator.df_results.head()  # results for closed axis tables

## Evaluate rules for tables with an open axis

Then we perform the relevant validation rules for each table with an open axis separately.

### Check if there are rules for tables with an open axis

In [None]:
tables_with_rules = set([a for b in evaluator.df_rules.templates.tolist() for a in b])
tables_with_open_axis_and_rules = tables_with_rules & set(tables_open_axis)
print(tables_with_open_axis_and_rules)

### Evaluate rules for tables with an open axis

In [None]:
output_open_axis = {}  # dictionary with input and results per table
for table in tables_with_open_axis_and_rules:  # loop through open axis tables for which a rule has been defined
    info = {}
    info['data'] = dict_open_axis[table]  # add data to dictionary
    rule_indexes = evaluator.df_rules[evaluator.df_rules['templates'].
                                      apply(lambda x: any([table in x]))].index  # identify rules for current table
    info['rules'] = df_rules.loc[rule_indexes]  # add rules to dictionary
    evaluator_open_axis = Evaluator(info['data'], info['rules'], df_datapoints, PARAMETERS)  # call evaluator
    info['patterns'] = evaluator_open_axis.df_patterns  # add resulting patterns to dictionary
    info['results'] = evaluator_open_axis.df_results  # add results to dictionary
    output_open_axis[table] = info

In [None]:
if len(tables_with_open_axis_and_rules) > 0:  # print rules if there are rules for tables with an open axis (for first table)
    display(output_open_axis[list(tables_with_open_axis_and_rules)[0]]['rules'])

In [None]:
if len(tables_with_open_axis_and_rules) > 0:  # print patterns if there are rules for tables with an open axis (for first table)
    display(output_open_axis[list(tables_with_open_axis_and_rules)[0]]['patterns'])

In [None]:
if len(tables_with_open_axis_and_rules) > 0:  # print results if there are rules for tables with an open axis (for first table)
    display(output_open_axis[list(tables_with_open_axis_and_rules)[0]]['results'].head())

## Combine and export results for closed and open axis tables

Now we have the results from tables with closed and open axes and we can combine the results.

In [None]:
# Function to transform results for open-axis tables, so it can be appended to results for closed-axis tables
# The 'extra' index columns are converted to data columns
def transform_results_open_axis(df):
    if df.index.nlevels > 2:
        reset_index_levels = list(range(2, df.index.nlevels))
        df = df.reset_index(level=reset_index_levels)
        rename_columns={}
        for x in reset_index_levels:
            rename_columns['level_' + str(x)] = 'id_column_' + str(x - 1)
        df.rename(columns=rename_columns, inplace=True)
    return df

In [None]:
df_all_patterns = evaluator.df_patterns  # patterns for closed axis tables 
df_all_results = evaluator.df_results  # results for closed axis tables
for table in output_open_axis:  # for all open axis tables with rules -> append and sort patterns and results
    df_all_patterns = output_open_axis[table]['patterns'].append(df_all_patterns).sort_values(by=['pattern_id']).reset_index(drop=True)
    df_all_results = transform_results_open_axis(output_open_axis[table]['results']).append(df_all_results, sort=False).sort_values(by=['pattern_id']).sort_index()

## Save results

The dataframe df_all_results contains all output of the evaluation of the validation rules. 

In [None]:
df_all_patterns.to_excel(join(RESULTS_PATH, "patterns.xlsx"))  # export patterns to excel file in results folder

In [None]:
# To save all results use df_all_results
# To save all exceptions use df_all_results['result_type']==False 
# To save all confirmations use df_all_results['result_type']==True

# Here we save only the exceptions to the validation rules
df_results = df_all_results[df_all_results['result_type']==False]
df_results.to_excel(join(RESULTS_PATH, "results.xlsx"))  # export results to excel file in results folder

In [None]:
df_all_patterns.head()

In [None]:
df_results.head()

In [None]:
# Get the pandas code from the first pattern and evaluate it
s = df_all_patterns.loc[0, 'pandas ex'].replace('df', 'df_closed_axis')
print(s)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # print whole dataframe
    display(eval(s).T)