# Introductions 

This notebook is to show the model ability to predict the complaint and give desired output.

# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model


# for text preprocess
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

# save model
import pickle

2024-07-05 00:13:26.278086: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/nathanaelh/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/nathanaelh/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/nathanaelh/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Load Model

In [2]:
# load model
model = load_model('model.keras')

# Create Data Dummy

In [3]:
# Assuming data_dummy is your dictionary
data_dummy = {
    'date_received': [
        '2018-07-29', '2020-03-20', '2022-04-19'
    ],
    'product': [
        'Debt collection', 'Debt collection', 'Debt collection'
    ],
    'sub_product': [
        'Medical', 'Credit card', 'I do not know'
    ],
    'issue': [
        "Cont'd attempts collect debt not owed", "False statements or representation", "Communication tactics"
    ],
    'sub_issue': [
        "Debt was paid", "Attempted to collect wrong amount", "Frequent or repeated calls"
    ],
    'consumer_complaint_narrative': [
        "I do not own anymore debt from this bank anymore. Please dont send me any more debt bill. ", "Why am I must pay double the amount of my bill. This is a harrasment and a scam in progress!", "STOP CALLING ME FOR NO REASON! I REALLY DESPISE THIS KIND OF MARKETING TACTICS!"
    ],
    'company_public_response': [
        "We're sorry to hear that. We will communicate with billing division to solve this problem.", "We're sorry to hear that. We will communicate with billing division to check for this problem", "Hi, sorry to hear that. We will evaluate our program to suit best for our customer's need. Thank you"
    ],
    'company': [
        "Genesis Lending", "Paypal", "Roquemore Holdings LLC"
    ],
    'state': [
        "AE", "NJ", "NY"
    ],
    'zip_code': [
        "092XX", "076XX", "146XX"
    ],
    'tags': [
        "Older American", "Older American", "Older American"
    ],
    'consumer_consent_provided?': [
        "Consent provided", "Consent provided", "Consent provided"
    ],
    'submitted_via': [
        "Web", "Web", "Web"
    ],
    'date_sent_to_company': [
        "2018-07-12", "2020-03-10", "2022-04-10"
    ],
    'company_response_to_consumer': [
        "Closed", "Closed", "Closed"
    ],
    'timely_response?': [
        "Yes", "Yes", "Yes"
    ],
    'consumer_disputed?': [
        "No", "No", "No"
    ],
    'complaint_id': [
        1807128, 2003104, 2204107
    ],


}

# Create DataFrame
df_data_dummy = pd.DataFrame(data_dummy)

# Display DataFrame
print(df_data_dummy.head())

  date_received          product    sub_product  \
0    2018-07-29  Debt collection        Medical   
1    2020-03-20  Debt collection    Credit card   
2    2022-04-19  Debt collection  I do not know   

                                   issue                          sub_issue  \
0  Cont'd attempts collect debt not owed                      Debt was paid   
1     False statements or representation  Attempted to collect wrong amount   
2                  Communication tactics         Frequent or repeated calls   

                        consumer_complaint_narrative  \
0  I do not own anymore debt from this bank anymo...   
1  Why am I must pay double the amount of my bill...   
2  STOP CALLING ME FOR NO REASON! I REALLY DESPIS...   

                             company_public_response                 company  \
0  We're sorry to hear that. We will communicate ...         Genesis Lending   
1  We're sorry to hear that. We will communicate ...                  Paypal   
2  Hi, sorry 

# Create Combined Narrative

In [4]:
#combine issue, sub_issue, and consumer_complaint_narrative into one column
df_data_dummy['sentence'] = df_data_dummy['issue'] + ' ' + df_data_dummy['sub_issue'] + ' ' + df_data_dummy['consumer_complaint_narrative']

# Text Preprocessing

In [5]:
# Define Stopwords
stop_words = set(stopwords.words('english'))

# Define Stemming
lemmatizer = WordNetLemmatizer()

In [6]:
# Create A Function for Text Preprocessing
def text_preprocessing(text):
  # Case folding
  text = text.lower()

  # Mention removal
  text = re.sub("@[A-Za-z0-9_]+", " ", text)

  # Hashtags removal
  text = re.sub("#[A-Za-z0-9_]+", " ", text)

  # Newline removal (\n)
  text = re.sub(r"\\n", " ",text)

  # Whitespace removal
  text = text.strip()

  # URL removal
  text = re.sub(r"http\S+", " ", text)
  text = re.sub(r"www.\S+", " ", text)

  # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
  text = re.sub("[^A-Za-z\s']", " ", text)
  text = re.sub("'", "", text)

  # Tokenization
  tokens = word_tokenize(text)

  # Stopwords removal
  tokens = [word for word in tokens if word not in stop_words]

  # Stemming
  tokens = [lemmatizer.lemmatize(word) for word in tokens]

  # Combining Tokens
  text = " ".join(tokens)

  return text

In [7]:
sentence = df_data_dummy['sentence'].apply(lambda x: text_preprocessing(x))
sentence

0    contd attempt collect debt owed debt paid anym...
1    false statement representation attempted colle...
2    communication tactic frequent repeated call st...
Name: sentence, dtype: object

# Make Prediction

In [12]:
#prediction
predict_proba = model.predict(sentence)


predict_proba[:,0] = tf.where(predict_proba[:,0] > 0.1, 1, 0)  # Thresholding light complaints at 0.1
predict_proba[:,1] = tf.where(predict_proba[:,1] > 0.1, 1, 0)  # Thresholding harsh complaints at 0.1
predict_proba[:,2] = tf.where(predict_proba[:,2] > 0.75, 1, 0)  # Thresholding mid complaints at 0.75

label_1 = predict_proba[0]

print(f'Prediction: {label_1}')
predicted_label = []
if label_1[0] == 1:
    predicted_label.append('light complaint')
if label_1[1] == 1:
    predicted_label.append('harsh complaint')
if label_1[2] == 1:
    predicted_label.append('mild complaint')

print('label for predicted sentence')
for x in predicted_label:
    print(f"- {x}")

Prediction: [0. 1. 1.]
label for predicted sentence
- harsh complaint
- mild complaint


In [13]:
def response(prediction):
    if (prediction == [0, 1, 1]).all() or \
       (prediction == [1, 0, 1]).all() or \
       (prediction == [0, 1, 0]).all() or \
       (prediction == [1, 1, 1]).all():
        return 'Requires Immediate Attention'
    else:
        return 'Follow The Standard Time Protocol'

# Generate the response
response_message = response(label_1)
print(response_message)

Requires Immediate Attention


Based on this output the model have been successfully predict the complaint level.