In [1]:
dict = {0 	:'activate_my_card',
1 	:'age_limit',
2 	:'apple_pay_or_google_pay',
3 	:'atm_support',
4 	:'automatic_top_up',
5 	:'balance_not_updated_after_bank_transfer',
6 	:'balance_not_updated_after_cheque_or_cash_deposit',
7 	:'beneficiary_not_allowed',
8 	:'cancel_transfer',
9 	:'card_about_to_expire',
10 	:'card_acceptance',
11 	:'card_arrival',
12 	:'card_delivery_estimate',
13 	:'card_linking',
14 	:'card_not_working',
15 	:'card_payment_fee_charged',
16 	:'card_payment_not_recognised',
17 	:'card_payment_wrong_exchange_rate',
18 	:'card_swallowed',
19 	:'cash_withdrawal_charge',
20 	:'cash_withdrawal_not_recognised',
21 	:'change_pin',
22 	:'compromised_card',
23 	:'contactless_not_working',
24 	:'country_support',
25 	:'declined_card_payment',
26 	:'declined_cash_withdrawal',
27 	:'declined_transfer',
28 	:'direct_debit_payment_not_recognised',
29 	:'disposable_card_limits',
30 	:'edit_personal_details',
31 	:'exchange_charge',
32 	:'exchange_rate',
33 	:'exchange_via_app',
34 	:'extra_charge_on_statement',
35 	:'failed_transfer',
36 	:'fiat_currency_support',
37 	:'get_disposable_virtual_card',
38 	:'get_physical_card',
39 	:'getting_spare_card',
40 	:'getting_virtual_card',
41 	:'lost_or_stolen_card',
42 	:'lost_or_stolen_phone',
43 	:'order_physical_card',
44 	:'passcode_forgotten',
45 	:'pending_card_payment',
46 	:'pending_cash_withdrawal',
47 	:'pending_top_up',
48 	:'pending_transfer',
49 	:'pin_blocked',
50 	:'receiving_money',
51 	:'Refund_not_showing_up',
52 	:'request_refund',
53 	:'reverted_card_payment',
54 	:'supported_cards_and_currencies',
55 	:'terminate_account',
56 	:'top_up_by_bank_transfer_charge',
57 	:'top_up_by_card_charge',
58 	:'top_up_by_cash_or_cheque',
59 	:'top_up_failed',
60 	:'top_up_limits',
61 	:'top_up_reverted',
62 	:'topping_up_by_card',
63 	:'transaction_charged_twice',
64 	:'transfer_fee_charged',
65 	:'transfer_into_account',
66 	:'transfer_not_received_by_recipient',
67 	:'transfer_timing',
68 	:'unable_to_verify_identity',
69 	:'verify_my_identity',
70 	:'verify_source_of_funds',
71 	:'verify_top_up',
72 	:'virtual_card_not_working',
73 	:'visa_or_mastercard',
74 	:'why_verify_identity',
75 	:'wrong_amount_of_cash_received',
76 	:'wrong_exchange_rate_for_cash_withdrawal'}

# Banking77 Intent Detection System Evaluation

The objective of this project is to evaluate and compare the performance of various models on the BANKING77 dataset to determine which models achieve the best results. We have selected a set of distinct models and seek to identify which model achieves the highest accuracy in understanding and addressing customer questions. We hope that this project will give us a greater understanding of intent detection and its applications for specific domains, as well as certain models that can be used for it.

This project will focus on comparing the overall accuracy, precision, recall, and F1-scores of selected models, while also considering their complexity. The results of this work could help provide insights into the strengths and weaknesses of each model for real-world use.

## Step 1 - Setup

We load the BANKING77 dataset and save the texts and labels of the test set to a Pandas DataFrame (for easier evaluation).

In [2]:
import pandas as pd
from datasets import load_dataset

dataset = load_dataset("banking77")

texts = []
labels = []

for x in dataset['test']:
    texts.append(x['text'])
    labels.append(dict[x['label']])

df = pd.DataFrame({'text': texts, 'label': labels})
df.to_csv('banking77.csv', index=False)

Found cached dataset banking77 (C:/Users/Sean/.cache/huggingface/datasets/banking77/default/1.1.0/ff44c4421d7e70aa810b0fa79d36908a38b87aff8125d002cd44f7fcd31f493c)


  0%|          | 0/2 [00:00<?, ?it/s]

## Step 2 - System Evaluation

We can now begin evaluating systems and saving the results to the dataset

### System 1 - DistilBERT-Banking77

The first system we will evaluate is the DistilBERT-BANKING77 model. This model is a fine-tuned version of DistilBERT which performs intent detection as a multi-class classification. This is a transformers-based model that can be easily loaded and evaluated in Python using the `transformers` package.

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_id = 'philschmid/DistilBERT-Banking77'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
classifier = pipeline('text-classification', tokenizer=tokenizer, model=model)

# Iterate over the test set and create a list of predictions
predictions = []
for x in dataset['test']:
    predictions.append(classifier(x['text'])[0]['label'])

df['predictions-distilbert'] = predictions
df.head()

Unnamed: 0,text,label,predictions-distilbert
0,How do I locate my card?,card_arrival,card_arrival
1,"I still have not received my new card, I order...",card_arrival,card_arrival
2,I ordered a card but it has not arrived. Help ...,card_arrival,card_arrival
3,Is there a way to know when my card will arrive?,card_arrival,card_arrival
4,My card has not arrived yet.,card_arrival,card_arrival


In [4]:
classifier('What is the base of the exchange rates?')

[{'label': 'exchange_rate', 'score': 0.9791280627250671}]

### System 2 - RoBERTa-Banking77

In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_id = 'philschmid/RoBERTa-Banking77'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
classifier = pipeline('text-classification', tokenizer=tokenizer, model=model)

# Iterate over the test set and create a list of predictions
predictions = []
for x in dataset['test']:
    predictions.append(classifier(x['text'])[0]['label'])

df['predictions-roberta'] = predictions
df.head()

Unnamed: 0,text,label,predictions-distilbert,predictions-roberta
0,How do I locate my card?,card_arrival,card_arrival,fiat_currency_support
1,"I still have not received my new card, I order...",card_arrival,card_arrival,atm_support
2,I ordered a card but it has not arrived. Help ...,card_arrival,card_arrival,atm_support
3,Is there a way to know when my card will arrive?,card_arrival,card_arrival,automatic_top_up
4,My card has not arrived yet.,card_arrival,card_arrival,atm_support


In [6]:
classifier('What is the base of the exchange rates?')

[{'label': 'declined_cash_withdrawal', 'score': 0.9911164045333862}]

### System 3 - GPT2-Fine-Tuned-BANKING77

In [23]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_id = 'Kwaku/gpt2-finetuned-banking77'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
classifier = pipeline('text-classification', tokenizer=tokenizer, model=model)

# Iterate over the test set and create a list of predictions
predictions = []
for x in dataset['test']:
    predictions.append(classifier(x['text'])[0]['label'])

df['predictions-gpt2'] = predictions
df.head()

Some weights of the model checkpoint at Kwaku/gpt2-finetuned-banking77 were not used when initializing GPT2ForSequenceClassification: ['lm_head.weight']
- This IS expected if you are initializing GPT2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at Kwaku/gpt2-finetuned-banking77 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Unnamed: 0,text,label,predictions-distilbert,predictions-roberta,predictions-gpt2
0,How do I locate my card?,card_arrival,card_arrival,fiat_currency_support,card_arrival
1,"I still have not received my new card, I order...",card_arrival,card_arrival,atm_support,card_arrival
2,I ordered a card but it has not arrived. Help ...,card_arrival,card_arrival,atm_support,card_arrival
3,Is there a way to know when my card will arrive?,card_arrival,card_arrival,automatic_top_up,card_arrival
4,My card has not arrived yet.,card_arrival,card_arrival,atm_support,card_arrival


## Step 3 - Evaluate Results

We save the results to a CSV for future use, if needed. We can also compare the labels predicted by systems with the actual results and find the accuracy, F1-score, etc.

In [9]:
from sklearn.metrics import classification_report

expected = df['label'].tolist()

predicted_1 = df['predictions-distilbert'].tolist()
print(classification_report(expected, predicted_1))

                                                  precision    recall  f1-score   support

                           Refund_not_showing_up       1.00      0.90      0.95        40
                                activate_my_card       1.00      0.97      0.99        40
                                       age_limit       1.00      1.00      1.00        40
                         apple_pay_or_google_pay       1.00      1.00      1.00        40
                                     atm_support       0.97      0.97      0.97        40
                                automatic_top_up       1.00      0.90      0.95        40
         balance_not_updated_after_bank_transfer       0.82      0.70      0.76        40
balance_not_updated_after_cheque_or_cash_deposit       0.97      0.95      0.96        40
                         beneficiary_not_allowed       0.97      0.88      0.92        40
                                 cancel_transfer       1.00      0.97      0.99        40
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
predicted_2 = df['predictions-roberta'].tolist()
print(classification_report(expected, predicted_2))

                                                  precision    recall  f1-score   support

                           Refund_not_showing_up       0.00      0.00      0.00        40
                                activate_my_card       1.00      0.97      0.99        40
                                       age_limit       1.00      1.00      1.00        40
                         apple_pay_or_google_pay       0.00      0.00      0.00        40
                                     atm_support       0.00      0.00      0.00        40
                                automatic_top_up       0.00      0.00      0.00        40
         balance_not_updated_after_bank_transfer       0.00      0.00      0.00        40
balance_not_updated_after_cheque_or_cash_deposit       0.00      0.00      0.00        40
                         beneficiary_not_allowed       0.00      0.00      0.00        40
                                 cancel_transfer       0.00      0.00      0.00        40
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
predicted_3 = df['predictions-gpt2'].tolist()
print(classification_report(expected, predicted_3))

                                                  precision    recall  f1-score   support

                           Refund_not_showing_up       0.74      0.70      0.72        40
                                activate_my_card       0.73      0.68      0.70        40
                                       age_limit       0.70      0.75      0.72        40
                         apple_pay_or_google_pay       0.63      0.68      0.65        40
                                     atm_support       0.63      0.60      0.62        40
                                automatic_top_up       0.68      0.62      0.65        40
         balance_not_updated_after_bank_transfer       0.51      0.53      0.52        40
balance_not_updated_after_cheque_or_cash_deposit       0.69      0.55      0.61        40
                         beneficiary_not_allowed       0.74      0.57      0.65        40
                                 cancel_transfer       0.78      0.72      0.75        40
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
df.to_csv('banking77-with-predictions.csv', index=False)