# Fiancial Domain Classification Experiment

In this notebook, we will attempt to reproduce the experimental setup for the FinSim-3 2021 shared task of classifying financial terms. The experiment takes inspiration from the paper [Yseop at FinSim-3 Shared Task 2021: Specializing Financial Domain Learning with Phrase Representations](https://arxiv.org/abs/2108.09485) describing the phrase representation model [FinISH (Finance-Identifying Sroberta for Hypernyms)](https://huggingface.co/yseop/roberta-base-finance-hypernym-identification).

### Data model

The data model is based on the FIBO ontology for financial terms.

In [32]:
from lnn import And, Equivalent, Implies, Propositions

# Test on a single input
input_dict = [{'term': 'Alternative Debenture', 'label': 'Bonds'}, {'term': 'Bearer bond', 'label': 'Bonds'}, {'term': 'Bond coupon', 'label': 'Bonds'}, {'term': 'Callable bond', 'label': 'Bonds'}, {'term': 'CoCo Bonds', 'label': 'Bonds'}, {'term': 'Collateralized Debt Obligation', 'label': 'Bonds'}, {'term': 'Collateralized Loan Obligation', 'label': 'Bonds'}, {'term': 'Collateralized Mortgage Obligation', 'label': 'Bonds'}, {'term': 'Contingent Convertible Bonds', 'label': 'Bonds'}, {'term': 'Convertible Bond', 'label': 'Bonds'}, {'term': 'Convertible bond', 'label': 'Bonds'}, {'term': 'Corporate Bond', 'label': 'Bonds'}, {'term': 'Corporate bond', 'label': 'Bonds'}, {'term': 'Corporate Note', 'label': 'Bonds'}, {'term': 'Covered Bond', 'label': 'Bonds'}, {'term': 'Credit linked notes', 'label': 'Bonds'}, {'term': 'Debenture', 'label': 'Bonds'}, {'term': 'Enhanced Capital Note', 'label': 'Bonds'}, {'term': 'Eurobond', 'label': 'Bonds'}, {'term': 'Exchangeable Bond', 'label': 'Bonds'}, {'term': 'Floating Rate Note', 'label': 'Bonds'}, {'term': 'Floating rate note', 'label': 'Bonds'}, {'term': 'Government Bond', 'label': 'Bonds'}, {'term': 'Government bond', 'label': 'Bonds'}, {'term': 'Government Bond Index Linked', 'label': 'Bonds'}, {'term': 'Green bond', 'label': 'Bonds'}, {'term': 'High grade bond', 'label': 'Bonds'}, {'term': 'High-risk bonds', 'label': 'Bonds'}, {'term': 'Inflation Index Security', 'label': 'Bonds'}, {'term': 'Inflation indexed bond', 'label': 'Bonds'}, {'term': 'Interest Rate', 'label': 'Bonds'}, {'term': 'Junk bonds', 'label': 'Bonds'}, {'term': 'Listed bond', 'label': 'Bonds'}, {'term': 'Long term bond', 'label': 'Bonds'}, {'term': 'Low-risk bonds', 'label': 'Bonds'}, {'term': 'Mortgage Backed Security', 'label': 'Bonds'}, {'term': 'Mortgage bond', 'label': 'Bonds'}, {'term': 'Municipal Bond', 'label': 'Bonds'}, {'term': 'Municipal bonds', 'label': 'Bonds'}, {'term': 'Participation Note', 'label': 'Bonds'}, {'term': 'Pay-in-kind bonds', 'label': 'Bonds'}, {'term': 'Perpetual bond', 'label': 'Bonds'}, {'term': 'Promissory Note', 'label': 'Bonds'}, {'term': 'Promissory note', 'label': 'Bonds'}, {'term': 'Reverse Convertible Bond', 'label': 'Bonds'}, {'term': 'Reverse convertible bond', 'label': 'Bonds'}, {'term': 'Securitised Debenture', 'label': 'Bonds'}, {'term': 'Subordinated bond', 'label': 'Bonds'}, {'term': 'Sukuk', 'label': 'Bonds'}, {'term': 'Supranational Bond', 'label': 'Bonds'}, {'term': 'To Be Announced', 'label': 'Bonds'}, {'term': 'Treasury Strip', 'label': 'Bonds'}, {'term': 'Warrant-linked bond', 'label': 'Bonds'}, {'term': 'Yankee bonds', 'label': 'Bonds'}, {'term': 'Zero coupon bond', 'label': 'Bonds'}, {'term': 'Currency Forward', 'label': 'Forward'}, {'term': 'Foreign Exchange Forward', 'label': 'Forward'}, {'term': 'Forwards', 'label': 'Forward'}, {'term': 'FX Forward', 'label': 'Forward'}, {'term': 'Global Depositary Receipt', 'label': 'Forward'}, {'term': 'Index Forward', 'label': 'Forward'}, {'term': 'NDF', 'label': 'Forward'}]

# Experiment 1: create FOL rules for terms and labels by tokenizing them
# Rules
alternative, dubenture, bearer, bond, BONDS, coupon, FUTURE = Propositions("alternative", "debenture", "bearer", "bond", "Bonds", "coupon", "Future")
alternative_debenture = And(alternative, dubenture)
bearer_bond = And(bearer, bond)
bond_coupon = And(bond, coupon)

alternative_dubenture_BONDS = Implies(alternative_debenture, BONDS)
bearer_bond_BONDS = Implies(bearer_bond, BONDS)
bond_coupon_BONDS = Implies(bond_coupon, BONDS)


bond_coupon_FUTURE = Implies(bond_coupon, FUTURE)

# Data
alternative.add_data((0.0, 0.2))
dubenture.add_data((0.0, 0.2))
alternative_debenture.add_data((0.0, 0.04))

bearer.add_data((0.0, 0.2))
bond.add_data((0.0, 0.4))
coupon.add_data((0.0, 0.2))
bearer_bond.add_data((0.0, 0.08))
bond_coupon.add_data((0.0, 0.08))

BONDS.add_data((0.0, 0.058))
FUTURE.add_data((0.0, 0.058))

# Reasoning
alternative_debenture.downward()
alternative_debenture.print()

bearer_bond.downward()
bearer_bond.print()

alternative_dubenture_BONDS.upward()
alternative_dubenture_BONDS.print()

bearer_bond_BONDS.upward()
bearer_bond_BONDS.print()

bond_coupon_BONDS.upward()
bond_coupon_BONDS.print()

bond_coupon_FUTURE.upward()
bond_coupon_FUTURE.print()

OPEN And: (alternative ∧ debenture)                 APPROX_FALSE (0.0, 0.04)

OPEN And: (bearer ∧ bond)                           APPROX_FALSE (0.0, 0.08)

OPEN Implies: ((alternative ∧ debenture) → Bonds)    APPROX_TRUE (0.96, 1.0)

OPEN Implies: ((bearer ∧ bond) → Bonds)              APPROX_TRUE (0.92, 1.0)

OPEN Implies: ((bond ∧ coupon) → Bonds)              APPROX_TRUE (0.92, 1.0)

OPEN Implies: ((bond ∧ coupon) → Future)             APPROX_TRUE (0.92, 1.0)



In [34]:
from lnn import And, Equivalent, Implies, Propositions

# Test on a single input
input_dict = [{'term': 'Alternative Debenture', 'label': 'Bonds'}, {'term': 'Bearer bond', 'label': 'Bonds'}, {'term': 'Bond coupon', 'label': 'Bonds'}, {'term': 'Callable bond', 'label': 'Bonds'}, {'term': 'CoCo Bonds', 'label': 'Bonds'}, {'term': 'Collateralized Debt Obligation', 'label': 'Bonds'}, {'term': 'Collateralized Loan Obligation', 'label': 'Bonds'}, {'term': 'Collateralized Mortgage Obligation', 'label': 'Bonds'}, {'term': 'Contingent Convertible Bonds', 'label': 'Bonds'}, {'term': 'Convertible Bond', 'label': 'Bonds'}, {'term': 'Convertible bond', 'label': 'Bonds'}, {'term': 'Corporate Bond', 'label': 'Bonds'}, {'term': 'Corporate bond', 'label': 'Bonds'}, {'term': 'Corporate Note', 'label': 'Bonds'}, {'term': 'Covered Bond', 'label': 'Bonds'}, {'term': 'Credit linked notes', 'label': 'Bonds'}, {'term': 'Debenture', 'label': 'Bonds'}, {'term': 'Enhanced Capital Note', 'label': 'Bonds'}, {'term': 'Eurobond', 'label': 'Bonds'}, {'term': 'Exchangeable Bond', 'label': 'Bonds'}, {'term': 'Floating Rate Note', 'label': 'Bonds'}, {'term': 'Floating rate note', 'label': 'Bonds'}, {'term': 'Government Bond', 'label': 'Bonds'}, {'term': 'Government bond', 'label': 'Bonds'}, {'term': 'Government Bond Index Linked', 'label': 'Bonds'}, {'term': 'Green bond', 'label': 'Bonds'}, {'term': 'High grade bond', 'label': 'Bonds'}, {'term': 'High-risk bonds', 'label': 'Bonds'}, {'term': 'Inflation Index Security', 'label': 'Bonds'}, {'term': 'Inflation indexed bond', 'label': 'Bonds'}, {'term': 'Interest Rate', 'label': 'Bonds'}, {'term': 'Junk bonds', 'label': 'Bonds'}, {'term': 'Listed bond', 'label': 'Bonds'}, {'term': 'Long term bond', 'label': 'Bonds'}, {'term': 'Low-risk bonds', 'label': 'Bonds'}, {'term': 'Mortgage Backed Security', 'label': 'Bonds'}, {'term': 'Mortgage bond', 'label': 'Bonds'}, {'term': 'Municipal Bond', 'label': 'Bonds'}, {'term': 'Municipal bonds', 'label': 'Bonds'}, {'term': 'Participation Note', 'label': 'Bonds'}, {'term': 'Pay-in-kind bonds', 'label': 'Bonds'}, {'term': 'Perpetual bond', 'label': 'Bonds'}, {'term': 'Promissory Note', 'label': 'Bonds'}, {'term': 'Promissory note', 'label': 'Bonds'}, {'term': 'Reverse Convertible Bond', 'label': 'Bonds'}, {'term': 'Reverse convertible bond', 'label': 'Bonds'}, {'term': 'Securitised Debenture', 'label': 'Bonds'}, {'term': 'Subordinated bond', 'label': 'Bonds'}, {'term': 'Sukuk', 'label': 'Bonds'}, {'term': 'Supranational Bond', 'label': 'Bonds'}, {'term': 'To Be Announced', 'label': 'Bonds'}, {'term': 'Treasury Strip', 'label': 'Bonds'}, {'term': 'Warrant-linked bond', 'label': 'Bonds'}, {'term': 'Yankee bonds', 'label': 'Bonds'}, {'term': 'Zero coupon bond', 'label': 'Bonds'}, {'term': 'Currency Forward', 'label': 'Forward'}, {'term': 'Foreign Exchange Forward', 'label': 'Forward'}, {'term': 'Forwards', 'label': 'Forward'}, {'term': 'FX Forward', 'label': 'Forward'}, {'term': 'Global Depositary Receipt', 'label': 'Forward'}, {'term': 'Index Forward', 'label': 'Forward'}, {'term': 'NDF', 'label': 'Forward'}]

# Experiment 1: create FOL rules for terms and labels by tokenizing them
# Rules
alternative, dubenture, bearer, bond, BONDS, coupon, FUTURE = Propositions("alternative", "debenture", "bearer", "bond", "Bonds", "coupon", "Future")
alternative_debenture = And(alternative, dubenture)
bearer_bond = And(bearer, bond)
bond_coupon = And(bond, coupon)

alternative_dubenture_BONDS = Implies(alternative_debenture, BONDS)
bearer_bond_BONDS = Implies(bearer_bond, BONDS)
bond_coupon_BONDS = Implies(bond_coupon, BONDS)


bond_coupon_FUTURE = Implies(bond_coupon, FUTURE)

# Data
alternative_debenture.add_data((1.0, 1.0))

bearer_bond.add_data((1.0, 1.0))
bond_coupon.add_data((1.0, 1.0))

BONDS.add_data((0.058, 1.0))
FUTURE.add_data((0.0, 0.058))

# Reasoning
alternative_debenture.downward()
alternative_debenture.print()

bearer_bond.downward()
bearer_bond.print()

alternative_dubenture_BONDS.upward()
alternative_dubenture_BONDS.print()

bearer_bond_BONDS.upward()
bearer_bond_BONDS.print()

bond_coupon_BONDS.upward()
bond_coupon_BONDS.print()

bond_coupon_FUTURE.upward()
bond_coupon_FUTURE.print()

OPEN And: (alternative ∧ debenture)                         TRUE (1.0, 1.0)

OPEN And: (bearer ∧ bond)                                   TRUE (1.0, 1.0)

OPEN Implies: ((alternative ∧ debenture) → Bonds)  APPROX_UNKNOWN (0.058, 1.0)

OPEN Implies: ((bearer ∧ bond) → Bonds)            APPROX_UNKNOWN (0.058, 1.0)

OPEN Implies: ((bond ∧ coupon) → Bonds)            APPROX_UNKNOWN (0.058, 1.0)

OPEN Implies: ((bond ∧ coupon) → Future)            APPROX_FALSE (0.0, 0.058)



In [4]:
# load fibo ontology csv file containing flat ontology
import pandas as pd

onto_csv = pd.read_csv(r"C:\Users\Hanna\Desktop\LNN\tutorials\Chapter 1 - Reasoning\data_01\fibo\ontology\master\2021Q1-FULL\glossary-prod - glossary-prod.tsv", sep="\t")

onto_csv.head()
# transform all data to lowercase
onto_csv = onto_csv.apply(lambda x: x.astype(str).str.lower())
onto_csv.head()
## types = onto_csv['Type'].unique()
## print(types)

Unnamed: 0,Term,Type,Ontology,Synonyms,Definition,GeneratedDefinition,Examples,Explanations,Maturity
0,02079k107,namedindividual,equities example individuals ontology,,alphabet inc. class c common share cusip,,,,release
1,02079k305,namedindividual,equities example individuals ontology,,alphabet inc. class a common share cusip,,,,release
2,37833100,namedindividual,equities example individuals ontology,,apple inc. common share cusip,,,,release
3,172967424,namedindividual,equities example individuals ontology,,citigroup inc. common share cusip,,,,release
4,191216100,namedindividual,equities example individuals ontology,,the coca-cola company common share cusip,,,,release


In [19]:
# some stats

# check how many synonyms we have
print("#SYNONYMS:", onto_csv['Synonyms'].isnull().sum())

# check how many definitions we have
print("#DEFINITIONS:", onto_csv['Definition'].isnull().values.any())

# check how many definitions we have
print("#GENRATED_DEFINITIONS:", onto_csv['GeneratedDefinition'].isnull().values.any())

# check how many definitions we have
print("#EXAMPLES:", onto_csv['Examples'].isnull().values.any())

# check how many definitions we have
print("#EXPLANATIONS:", onto_csv['Explanations'].isnull().values.any())

# find count of all 'nan' strings
print("nan values in SYNONYMS:",len(onto_csv[onto_csv['Synonyms'] == 'nan'].index))
print("nan values in DEFINITIONS:",len(onto_csv[onto_csv['Definition'] == 'nan'].index))
print("nan values in GENRATED_DEFINITIONS:",len(onto_csv[onto_csv['GeneratedDefinition'] == 'nan'].index))
print("nan values in EXAMPLES:",len(onto_csv[onto_csv['Examples'] == 'nan'].index))
print("nan values in EXPLANATIONS:",len(onto_csv[onto_csv['Explanations'] == 'nan'].index))

print("good values in SYNONYMS:",len(onto_csv['Synonyms']) - len(onto_csv[onto_csv['Synonyms'] == 'nan'].index))
print("good values in DEFINITIONS:",len(onto_csv['Definition']) - len(onto_csv[onto_csv['Definition'] == 'nan'].index))
print("good values in GENRATED_DEFINITIONS:",len(onto_csv['GeneratedDefinition']) - len(onto_csv[onto_csv['GeneratedDefinition'] == 'nan'].index))
print("good values in EXAMPLES:",len(onto_csv['Examples']) - len(onto_csv[onto_csv['Examples'] == 'nan'].index))
print("good values in EXPLANATIONS:",len(onto_csv['Explanations']) - len(onto_csv[onto_csv['Explanations'] == 'nan'].index))
onto_csv.describe()


# Getting first 2 rows of columns Age and Marks from df
df_first_2 = onto_csv[['Synonyms', 'Definition', 'GeneratedDefinition', 'Examples', 'Explanations']].tail(15)
print(df_first_2)

#SYNONYMS: 0
#DEFINITIONS: False
#GENRATED_DEFINITIONS: False
#EXAMPLES: False
#EXPLANATIONS: False
nan values in SYNONYMS: 9263
nan values in DEFINITIONS: 4810
nan values in GENRATED_DEFINITIONS: 7796
nan values in EXAMPLES: 9373
nan values in EXPLANATIONS: 8380
good values in SYNONYMS: 221
good values in DEFINITIONS: 4674
good values in GENRATED_DEFINITIONS: 1688
good values in EXAMPLES: 111
good values in EXPLANATIONS: 1104
     Synonyms                                         Definition  \
9469      nan  an explicit recurrence interval of one week, o...   
9470      nan  basket whose constituents have some relative i...   
9471      nan  component of a basket whose relative importanc...   
9472      nan  expression or function that determines the rel...   
9473      nan                                                nan   
9474      nan                                                nan   
9475      nan  obligation-specific credit event whereby the b...   
9476      nan  formal con

In [2]:
# Create FOL knowledge for ontology terms
from lnn import (Predicate, Variable, Join, And,
                 Exists, Equivalent, Implies, ForAll, Model, Fact, World)

model = Model()  # Instantiate a model
x, y, z, w = map(Variable, ['x', 'y', 'z', 'w'])

# Define and add predicates to the model.
term = Predicate('term')
hasType = Predicate('hasType', 2) # binary predicate
inOntology = Predicate('inOntology', 2)
hasSynonym = Predicate('hasSynonym', 2)
hasDefinition = Predicate('hasDefinition', 2)
hasGeneratedDefinition = Predicate('hasGeneratedDefinition', 2)
hasExample = Predicate('hasExample', 2)
hasExplanation = Predicate('hasExplanation', 2)
hasMaturity = Predicate('hasMaturity', 2)
hasLabel = Predicate('hasLabel', 2)

# Add predicates and rules to the model
model.add_knowledge(term, hasType, inOntology, hasSynonym, hasDefinition, hasGeneratedDefinition, hasExample, hasExplanation, hasMaturity, hasLabel)

In [9]:
# Axioms declarations
rule_get_label_from_synonym = ForAll(x, y, z, Implies(And(inOntology(y, z), term(y), term(x), hasSynonym(x, y)), hasLabel(x, z)))
rule_get_label_from_class = ForAll(x, y, Implies(And(term(x), hasType(x, "class"), inOntology(x, y)), hasLabel(x, y)))

model.add_knowledge(rule_get_label_from_synonym)
model.add_knowledge(rule_get_label_from_class)

# Query
query =  Exists(x, hasLabel(x, 'bonds ontology'))
model.add_knowledge(query)

# Add facts to the model
model.add_data({
    inOntology: {('unsecured bond', 'bonds ontology'): Fact.TRUE},
    term: {'unsecured bond': Fact.TRUE, 'debenture': Fact.TRUE},
    hasSynonym: {('debenture', 'unsecured bond'): Fact.TRUE}})

# Perform inference
steps, facts_inferred = model.infer()

# Inspect the query node
print(model[query].true_groundings)

{('debenture', 'bonds ontology')}


In [10]:
# Add facts to the model
model.add_data({
    inOntology: {('bond coupon', 'bonds ontology'): Fact.TRUE},
    term: {'bond coupon': Fact.TRUE},
    hasType: {('bond coupon', 'class'): Fact.TRUE}})

# Perform inference
steps, facts_inferred = model.infer()

# Inspect the query node
print(model[query].true_groundings)

{('debenture', 'bonds ontology'), ('bond coupon', 'bonds ontology')}


In [11]:
result = model[query2].true_groundings
for key, val in result:
    print("*** KEY:", key)
    print("*** VAL:", val)

NameError: name 'query2' is not defined

In [3]:
# now load all ontology terms in model
for index, row in onto_csv.iterrows():
    # Add facts to the model
    model.add_data({
        inOntology: {(row['Term'], row['Ontology']): Fact.TRUE},
        term: {row['Term']: Fact.TRUE},
        hasSynonym: {(row['Term'], row['Synonyms']): Fact.TRUE},
        hasDefinition: {(row['Term'], row['Definition']): Fact.TRUE},
        hasGeneratedDefinition: {(row['Term'], row['GeneratedDefinition']): Fact.TRUE},
        hasExample: {(row['Term'], row['Examples']): Fact.TRUE},
        hasExplanation: {(row['Term'], row['Explanations']): Fact.TRUE},
        hasMaturity: {(row['Term'], row['Maturity']): Fact.TRUE},
        hasType: {(row['Term'], row['Type']): Fact.TRUE}})

In [4]:
# finally load terms that need to be labeled in model and ask questions about the labels
import json

training_data_path = r"C:\Users\Hanna\Desktop\LNN\tutorials\Chapter 1 - Reasoning\data_01\finish_data\data\terms\train.json"

with open(training_data_path, 'r') as j:
     training_data = json.loads(j.read())
        
print(training_data[0]['term'])

Alternative Debenture


In [5]:
for item in training_data:
        model.add_data({
        term: {item['term'].lower().replace("'", ""): Fact.TRUE}})   # remove quotes to avoid model inference output error

In [None]:
# Axioms declarations
axioms = [
    ForAll(x, y, z, Implies(And(inOntology(y, z), term(y), term(x), hasSynonym(x, y)), hasLabel(x, z)), name="rule-label-from-synonym"),
    ForAll(x, y, Implies(And(term(x), hasType(x, "class"), inOntology(x, y)), hasLabel(x, y)), name="rule-label-from-class")
]

model.add_knowledge(*axioms)

# now query the model for the labels
queries = [
    Exists(x, hasLabel(x, 'bonds ontology'), name="query-bonds-class")
]

model.add_knowledge(*queries)

# Perform inference
steps, facts_inferred = model.infer()

# Inspect the query node
for query in queries:
    print(f"Query {query.name} result:")
    print(model[query.name].true_groundings)

In [None]:
# print the model
model.print()

In [48]:
label_query_result = model[bonds_label_query].true_groundings
for key, val in label_query_result:
    print("*** KEY:", key)
    print("*** VAL:", val)

*** KEY: bond coupon
*** VAL: bonds ontology
*** KEY: debenture
*** VAL: bonds ontology
