In [1]:
import checklist
import numpy as np
import spacy
from checklist.editor import Editor
editor = Editor()

In [2]:
# !python -m spacy download en_core_web_sm

In [3]:
# Custom Utils for this Notebook
show_random = lambda ret: np.random.choice(ret.data, 3)

In [4]:
# Checklist Example of Editor filled with Lexicon Values

ret = editor.template('{first_name} is {a:profession} from {country}.',
                       profession=['lawyer', 'doctor', 'accountant'])
show_random(ret)

array(['Suzanne is a lawyer from Senegal.',
       'Thomas is an accountant from Guyana.',
       'Philip is a doctor from Sierra Leone.'], dtype='<U67')

In [5]:
# Checklist Example of Editor filled with a BERT-based mask
ret = editor.template('This is {a:adj} {mask}.',  
                      adj=['good', 'bad', 'great', 'terrible'])
show_random(ret)

Some weights of RobertaForMaskedLM were not initialized from the model checkpoint at roberta-base and are newly initialized: ['lm_head.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


array(['This is a bad song.', 'This is a bad test.',
       'This is a good pick.'], dtype='<U32')

In [6]:
# Define Our Banking Dictionary & Universe
account_types = [
    "savings",
    "salary",
    "ADIB",
    "Ghina",
    "current",
    "recurring deposit",
    "fixed deposit",
    "loan",
    "demat",
]
qualifiers = ["I", "we"]
loan_types = ["car", "travel", "personal", "student", "education", "housing", "home"]
debt_synonyms = ["financing", "debt", "loan", "finance"]
card_synonyms = ["card", "covered card", "credit card", "debit card", "atm card"]
card_brands = [
    "Smiles",
    "Emirates",
    "Cashback",
    "Skywards",
    "Rewards",
    "Etihad",
    "Skyward",
    "Booking.com",
    "Spice",
    "amex",
    "diners",
    "discover",
    "jcb",
    "jcb15",
    "jcb16",
    "maestro",
    "mastercard",
    "visa",
    "visa13",
    "visa16",
    "visa19",
]

account_info_terms = [
    "features",
    "interest rates",
    "charges",
    "details",
    "fees",
    "balance",
]
debt_info_terms = [
    "amount due",
    "due amount",
    "payment date",
    "installment",
]
action = ["apply", "get", "open"]
close = ["nearest", "nearby"]
find_close = ["how close is the", "where is the", "need to go to", "ADIB"]
find_what = [
    "ATM",
    "branch",
    "branches",
    "loan agent",
    "cash deposit machine",
    "cheque deposit machine",
    "cash withdrawal",
    "ccdm",
    "cash withdraw machine",
]
inform = [
    "What is my",
    "show me my",
    "find",
    "can you show me the",
    "show",
    "my",
    "show me the",
    "looking for my",
    "Check",
    "Remaining",
    "I want to know",
    "What is",
    "get",
    "Tell me",
    "Where is your" "What are the available",
]

In [7]:
# Sentence Templates with Intent Labels
REQUEST_INFO_INTENT = "ask_for_info"
templates_info_banking = [
    {
        "templates": "{find_close} {mask} {close} {find_what}",
        "labels": REQUEST_INFO_INTENT,
    },
    {
        "templates": "{inform} {mask} {account_types} {pron:mask} {account_info_terms}",
        "labels": REQUEST_INFO_INTENT,
    },
    {
        "templates": "{inform} {account_types} {account_info_terms}",
        "labels": REQUEST_INFO_INTENT,
    },
    {"templates": "{inform} {cards} {debt_info_terms}", "labels": REQUEST_INFO_INTENT},
    {"templates": "{loan_types} {debt_synonyms}", "labels": REQUEST_INFO_INTENT},
    {"templates": "{action} {card_brands} {cards}", "labels": "{action}"},
    {
        "templates": "How {verb:mask} {qualifiers} {action} {pron:mask} {card_brands} {cards}",
        "labels": "action",
    },
    {"templates": "{action} {account_types} account", "labels": "action"},
    {"templates": "{mask} {mask} open {account_types} account", "labels": "open"},
    {
        "templates": "{account_types} account {account_info_terms}",
        "labels": REQUEST_INFO_INTENT,
    },
]

In [8]:
def get_banking_generations(template, nsamples=10):
    """
    Wrapper function for Filling any Sentence Template with Our Banking Universe
    """
    return editor.template(
        **template,
        account_types=account_types,
        qualifiers=qualifiers,
        loan_types=loan_types,
        debt_synonyms=debt_synonyms,
        close=close,
        action=action,
        card_brands=card_brands,
        cards=card_synonyms,
        find_close=find_close,
        inform=inform,
        find_what=find_what,
        debt_info_terms=debt_info_terms,
        account_info_terms=account_info_terms,
        remove_duplicates=True,
        nsamples=nsamples,
        meta=True,
    )

In [9]:
filled_in_info = get_banking_generations(templates_info_banking[-1], nsamples=3)
filled_in_info.data, filled_in_info.meta, filled_in_info.labels

(['fixed deposit account balance',
  'ADIB account charges',
  'current account fees'],
 [{'account_types': 'fixed deposit', 'account_info_terms': 'balance'},
  {'account_types': 'ADIB', 'account_info_terms': 'charges'},
  {'account_types': 'current', 'account_info_terms': 'fees'}],
 ['ask_for_info', 'ask_for_info', 'ask_for_info'])

In [10]:
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR

editor = Editor()

nlp = spacy.load("en_core_web_sm")

dataset = [
    "This was a very nice movie directed by John Smith.",
    "Mary Keen was brilliant.",
    "I hated everything about this.",
    "This movie was very bad.",
    "I really liked this movie.",
    "just bad.",
    "amazing.",
]
t1 = Perturb.perturb(dataset, Perturb.add_typos)
t2 = Perturb.perturb([x[1] for x in t1.data], Perturb.contractions)

In [11]:
combined_data = [x[1] for x in t1.data] + [x[1] for x in t2.data]

In [12]:
combined_data

['This was a very nice movie directe dby John Smith.',
 'Mary Keen was brilliatn.',
 'I hated everything about htis.',
 'This movie was veryb ad.',
 ' Ireally liked this movie.',
 'jus tbad.',
 'amaizng.']