In [4]:
# Insert your project token

# Classifying customer notes with Watson NLP

This notebook was created using the [sample notebooks](https://dataplatform.cloud.ibm.com/exchange/public/entry/view/636001e59902133a4a23fd89f010e4cb?context=wx) in the Gallery. In this example we score customer complaints using the models we trained in the *Classify_notes_model_build* notebook. 

In [5]:
import watson_nlp

from watson_core.data_model.streams.resolver import DataStreamResolver
from watson_core.toolkit import fileio
from watson_nlp.blocks.classification.svm import SVM

### 1. Load customer interaction notes for scoring

**Important: regenerate data access code for the file in your project. Make sure to change the dataframe name after generation to complaint_df**
The csv file name that we will use for scoring is *notes_scoring.csv*

In [8]:
import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='A1FXJPFfY-kKaIVqMcaxTJcEHewBasP',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.private.us-south.cloud-object-storage.appdomain.cloud')

bucket = 'llmworkshopyi-donotdelete-pr-zvg5eylmuyhhek'
object_key = 'notes_scoring.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

complaint_df = pd.read_csv(body)
complaint_df.head()

Unnamed: 0,Customer_id,Consumer complaint narrative
0,1,I am writing you this statement to delete the ...
1,2,"On XX/XX/2023, I entered a Bank of America Bra..."
2,3,"Folks, I am in XXXX XXXX. I was living and wor..."
3,4,"On XX/XX/XXXX, XX/XX/XXXX and XX/XX/XXXX I was..."
4,5,I placed an order for a product with Best Buy ...


In [9]:
# Identifyer for the column that contains the customer notes
text_col = 'Consumer complaint narrative'

### 2. Load models

In [10]:
# Load the out-of-the box Watson NLP models that we will use in combination with our trained models

# Syntax Model
syntax_model = watson_nlp.load('syntax_izumo_en_stock')
# USE Embedding Model
use_model = watson_nlp.load('embedding_use_en_stock')

In [11]:
# Load the models we previously saved in the project for scoring
svm_model = watson_nlp.load(wslib.load_data('classify_notes_svm'))
ensemble_model = watson_nlp.load(wslib.load_data('classify_notes_ensemble'))

###  3. Invoke Scoring

In [12]:
# Function that will be invoked for scoring 
def predict_product(text):
    # run syntax model first
    syntax_result = syntax_model.run(text)
    # run SVM model on top of syntax result
    svm_preds = svm_model.run(use_model.run(syntax_result, doc_embed_style='raw_text'))
    
    predicted_svm = svm_preds.to_dict()["classes"][0]["class_name"]
    
    ensemble_preds = ensemble_model.run(text)
    predicted_ensemble = ensemble_preds.to_dict()["classes"][0]["class_name"]
    return (predicted_svm, predicted_ensemble)

In [13]:
# Invoke scoring

predictions = complaint_df[text_col].apply(lambda text: predict_product(text))
predictions_df = pd.DataFrame.from_records(predictions, columns=('Predicted SVM', 'Predicted Ensemble'))

# Display the results (for testing only)
predictions_df.head()

Unnamed: 0,Predicted SVM,Predicted Ensemble
0,Debt collection,"Credit reporting, credit repair services, or o..."
1,Checking or savings account,Checking or savings account
2,Checking or savings account,Checking or savings account
3,Debt collection,Debt collection
4,Credit card or prepaid card,Credit card or prepaid card


In [14]:
# Merge scoring results with the original dataset
result_df = complaint_df.merge(predictions_df, how='left', left_index=True, right_index=True)
result_df.head()

Unnamed: 0,Customer_id,Consumer complaint narrative,Predicted SVM,Predicted Ensemble
0,1,I am writing you this statement to delete the ...,Debt collection,"Credit reporting, credit repair services, or o..."
1,2,"On XX/XX/2023, I entered a Bank of America Bra...",Checking or savings account,Checking or savings account
2,3,"Folks, I am in XXXX XXXX. I was living and wor...",Checking or savings account,Checking or savings account
3,4,"On XX/XX/XXXX, XX/XX/XXXX and XX/XX/XXXX I was...",Debt collection,Debt collection
4,5,I placed an order for a product with Best Buy ...,Credit card or prepaid card,Credit card or prepaid card


### 4. Save Scoring Results in the project

In [13]:
# Write scoring results to the project

# let's assume you have the pandas DataFrame pandas_df which contains the data
# you want to save as a csv file
wslib.save_data("notes_scoring_results.csv", result_df.to_csv(index=False).encode())

{'name': 'notes_scoring_results.csv',
 'asset_type': 'data_asset',
 'asset_id': '9d63c8ca-283b-47e2-becf-080a02d5c509',
 'attachment_id': '28a99bac-47a2-44d3-a353-0cd63a7edece',
 'filepath': 'notes_scoring_results.csv',
 'data_size': None,
 'mime': 'text/csv',
 'summary': ['created file', 'created data asset', 'created attachment']}