In [24]:
import os
import json
import logging
import pandas as pd
import en_core_web_sm
nlp = en_core_web_sm.load()
from collections import Counter

import spacy
from spacy import displacy

input_dir = "data/raw"
logger = logging.getLogger(__name__)



In [25]:
ner_desc_df = pd.read_csv("data/preprocess/FinancialPhraseBank-v1.0/ner_descriptions.csv")

In [26]:
ner_desc_df

Unnamed: 0,Entities,Descriptions
0,AUTO,"car, auto-loan, automotive, vehicle, hirepurch..."
1,CC SERVICE,"Balance Transfer, Installments, Ezypay, EPP, P..."
2,FD,"fixed deposit, fd, FD"
3,INVESTMENT,"investment, gold, stocks, bonds, silver, commo..."
4,MORTGAGE,"house finance, property, mortgage, home loan, ..."
5,NEW CC,"new cc, petrol, cashback, movie tickets, air m..."
6,SEGMENT,"RAKElite, Premier Account, Elite Account, Rela..."
7,SERVICE,"RAK Token, Forgot Password, Forgot ID, Replace..."


In [27]:
ner_desc_df["Entities"][5]

'NEW CC'

In [2]:
file = os.path.join(input_dir, "{}.json".format("chat"))
with open(file) as f:
    data = json.loads(f.read())

logger.info("  putting dialogues into mutliple lists format...")
parsed_dials = []
for line in data:
    dialogue = line["dialogue"]
    
    all_text = []
    for i, text in enumerate(dialogue):
        text = text["text"]
        all_text.append(text)
        
    parsed_dials.append({"persona1": [], "persona2": [], "turns": all_text})
    
dialogues_lists = [d["turns"] for d in parsed_dials]
flattened_data = [item for sublist in dialogues_lists for item in sublist]
df = pd.DataFrame(flattened_data, columns=["sentence"])
    
# # load best maodel
nlp_rak = spacy.load("data/models/ner-model-best")



In [3]:
ner_entities_dict = {
    "AUTO": ["car, auto-loan, automotive, vehicle, hirepurchase, car insurance, car financing"],
    "CC SERVICE": ["Balance Transfer, Installments, Ezypay, EPP, Payment Plans"],
    "FD": ["fixed deposit, fd, FD"],
    "INVESTMENT": ["investment, gold, stocks, bonds, silver, commodities, trading account, Unit Trust, Mutual Fund"],
    "MORTGAGE": ["house finance, property, mortgage, home loan, home financing"],
    "NEW CC": ["new cc, petrol, cashback, movie tickets, air miles, groceries cashback, points"],
    "SEGMENT": ["RAKElite, Premier Account, Elite Account, Relationship Manager"],
    "SERVICE": ["RAK Token, Forgot Password, Forgot ID, Replace Credit Card"],
}

ner_desc_df = (pd.DataFrame.from_dict(ner_entities_dict, orient="index")
               .reset_index()
               .rename(columns={"index": "Entities", 0: "Descriptions"}))
ner_desc_df.to_csv(r"C:\Users\kewjs\Documents\02-Self_Learning\01-Data_Science\07-Chatbot\data\preprocess\FinancialPhraseBank-v1.0\ner_descriptions.csv", index=False)

In [4]:
ner_table = pd.read_csv(r"data\preprocess\FinancialPhraseBank-v1.0\ner_table.csv")
    
auto_sum = int(ner_table[ner_table["Entities"]=="AUTO"]["Sum"])
cc_service_sum = int(ner_table[ner_table["Entities"]=="CC SERVICE"]["Sum"])
fd_sum = int(ner_table[ner_table["Entities"]=="FD"]["Sum"])
investment_sum = int(ner_table[ner_table["Entities"]=="INVESTMENT"]["Sum"])
mortgage_sum = int(ner_table[ner_table["Entities"]=="MORTGAGE"]["Sum"])
new_cc_sum = int(ner_table[ner_table["Entities"]=="NEW CC"]["Sum"])
segment_sum = int(ner_table[ner_table["Entities"]=="SEGMENT"]["Sum"])
service_sum = int(ner_table[ner_table["Entities"]=="SERVICE"]["Sum"])

total_queries = auto_sum + cc_service_sum + fd_sum + investment_sum + mortgage_sum + new_cc_sum + segment_sum + service_sum


In [5]:
total_queries

1133

In [5]:
nlp_rak.get_pipe("ner").labels

('AUTO',
 'CC SERVICE',
 'FD',
 'INVESTMENT',
 'MORTGAGE',
 'NEW CC',
 'SEGMENT',
 'SERVICE')

In [20]:
text = {
    "sentence": ["How do I become an Elite customer? What Investment products do you offer? Suggest to me a cc with air miles benefit. How do I apply for Auto Loan with rakbank?", "How do I become an Elite customer? What Investment products do you offer? How do I apply for Car Loan with rakbank?", "I want a credit card with supermarket cashback. How do I get car loan from Rakbank. Also, I forgot my Raktoken how do I reset it?", "Can I open a trading account with Rakbank? What is the lowest rate you can offer for mortgage loan?", "I want a cc with petrol cashback", "I want to convert my CC purchase into installments. How can I do a Balance Transfer from my ADCB?", "Can I invest with rakbank?", "How do I apply for Car Loan? Can I do it online?", "How can I do a Balance Transfer", "I would like to apply for mortgage loan. What is your latest interest rate?"]
}

df = pd.DataFrame.from_dict(text)
tokens = nlp("".join(str(df.sentence.tolist())))
items = [x.text for x in tokens.ents]
Counter(items)

Counter({'Elite': 2,
         'Auto Loan': 1,
         'Car Loan': 2,
         'Rakbank': 2,
         'Balance Transfer': 1,
         'ADCB': 1,
         "Balance Transfer'": 1})

In [27]:
items

['Elite',
 'Auto Loan',
 'Elite',
 'Car Loan',
 'Rakbank',
 'Rakbank',
 'Balance Transfer',
 'ADCB',
 'Car Loan',
 "Balance Transfer'"]

In [6]:
text = "How do I become an Elite customer? What Investment products do you offer? Suggest to me a cc with air miles benefit. How do I apply for Auto Loan with rakbank?"
doc = nlp_rak(text)

for entity in doc.ents:
    print(entity.text, entity.label_)

Elite SEGMENT
Investment INVESTMENT
Auto AUTO


In [7]:
#Set colors
colors = {"SEGMENT": "#85C1E9", "INVESTMENT": "#ff6961", "AUTO": "lightgreen", "CC SERVICE": "#ffe4b5", "FD": "lightpurple", "MORTGAGE": "lightpink", "NEW CC": "#b0c4de", "SERVICE": "#cd5c5c"}
options = {"ents": ["SEGMENT", "INVESTMENT", "AUTO", "CC SERVICE", "FD", "MORTGAGE", "NEW CC", "SERVICE"], "colors": colors}


In [19]:
text1 = "How do I become an Elite customer? What Investment products do you offer? Suggest to me a cc with air miles benefit. How do I apply for Auto Loan with rakbank?"
doc1 = nlp_rak(text1)
html_doc1 = displacy.render(doc1, style="ent", options=options)

text1 = "How do I become an Elite customer? What Investment products do you offer? Suggest to me a cc with air miles benefit. How do I apply for Auto Loan with rakbank?"
doc1 = nlp_rak(text1)
html_doc1 = displacy.render(doc1, style="ent", options=options)

text2 = "Can I open a trading account with Rakbank? What is the lowest rate you can offer for mortgage loan?"
doc2 = nlp_rak(text2)
html_doc2 = displacy.render(doc2, style="ent", options=options)

text3 = "I want a credit card with supermarket cashback. How do I get car loan from Rakbank. Also, I forgot my Raktoken how do I reset it?"
doc3 = nlp_rak(text3)
html_doc3 = displacy.render(doc3, style="ent", options=options)

text4 = "How do I become an Elite customer? What Investment products do you offer? How do I apply for Car Loan with rakbank?"
doc4 = nlp_rak(text4)
html_doc4 = displacy.render(doc4, style="ent", options=options)

In [23]:
from IPython.display import display

ner_output_dict = {
    "sentence": [html_doc1],
} 

ner_output_df = pd.DataFrame(ner_output_dict)

display(ner_output_df)

Unnamed: 0,sentence
0,


In [21]:
ner_output_dict

{'sentence': [None, None, None, None]}

In [8]:
#Visualize the output with displacy (need to use jupyter notebook)
#from spacy import displacy
displacy.render(doc, style='ent',options=options)

In [9]:
text = "How do I become an Elite customer? What Investment products do you offer? How do I apply for Car Loan with rakbank?"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [10]:
text = "I want a credit card with supermarket cashback. How do I get car loan from Rakbank. Also, I forgot my Raktoken how do I reset it?"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [11]:
text = "Can I open a trading account with Rakbank? What is the lowest rate you can offer for mortgage loan?"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [12]:
text = "I want a cc with petrol cashback"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [13]:
text = "I want to convert my CC purchase into installments. How can I do a Balance Transfer from my ADCB?"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [14]:
text = "Can I invest with rakbank?"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [15]:
text = "How do I apply for Car Loan? Can I do it online?"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [16]:
text = "How can I do a Balance Transfer"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)

In [17]:
text = "I would like to apply for mortgage loan. What is your latest interest rate?"
doc = nlp_rak(text)
displacy.render(doc, style='ent',options=options)