# 7. Model Inference

## Import Libraries

In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Concatenate, Input, Dropout
from tensorflow.keras.models import load_model, Sequential, Model
from tensorflow.keras.layers import TextVectorization


# for text preprocess
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')

# save model
import pickle

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/fredericksembiring/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/fredericksembiring/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Import Model

In [15]:
# load model
model_1 = load_model('model.keras')

In [16]:
#load TextVectorization model (digunakan jika local tidak bisa masukin textvectorization ke model)

# vectorization_data = pickle.load(open('vectorizer.pkl', 'rb'))
# vectorizer = TextVectorization.from_config(vectorization_data['config'])

## Create Data Dummy

In [18]:
# Assuming data_dummy is your dictionary
data_dummy = {
    'date_received': [
        '2018-07-29', '2020-03-20', '2022-04-19'
    ],
    'product': [
        'Debt collection', 'Debt collection', 'Debt collection'
    ],
    'sub_product': [
        'Medical', 'Credit card', 'I do not know'
    ],
    'issue': [
        "Cont'd attempts collect debt not owed", "False statements or representation", "Communication tactics"
    ],
    'sub_issue': [
        "Debt was paid", "Attempted to collect wrong amount", "Frequent or repeated calls"
    ],
    'consumer_complaint_narrative': [
        "I do not own anymore debt from this bank anymore. Please dont send me any more debt bill. ", "Why am I must pay double the amount of my bill. This is a harrasment and a scam in progress!", "STOP CALLING ME FOR NO REASON! I REALLY DESPISE THIS KIND OF MARKETING TACTICS!"
    ],
    'company_public_response': [
        "We're sorry to hear that. We will communicate with billing division to solve this problem.", "We're sorry to hear that. We will communicate with billing division to check for this problem", "Hi, sorry to hear that. We will evaluate our program to suit best for our customer's need. Thank you"
    ],
    'company': [
        "Genesis Lending", "Paypal", "Roquemore Holdings LLC"
    ],
    'state': [
        "AE", "NJ", "NY"
    ],
    'zip_code': [
        "092XX", "076XX", "146XX"
    ],
    'tags': [
        "Older American", "Older American", "Older American"
    ],
    'consumer_consent_provided?': [
        "Consent provided", "Consent provided", "Consent provided"
    ],
    'submitted_via': [
        "Web", "Web", "Web"
    ],
    'date_sent_to_company': [
        "2018-07-12", "2020-03-10", "2022-04-10"
    ],
    'company_response_to_consumer': [
        "Closed", "Closed", "Closed"
    ],
    'timely_response?': [
        "Yes", "Yes", "Yes"
    ],
    'consumer_disputed?': [
        "No", "No", "No"
    ],
    'complaint_id': [
        1807128, 2003104, 2204107
    ],


}

# Create DataFrame
df_data_dummy = pd.DataFrame(data_dummy)

# Display DataFrame
print(df_data_dummy.head())

  date_received          product    sub_product  \
0    2018-07-29  Debt collection        Medical   
1    2020-03-20  Debt collection    Credit card   
2    2022-04-19  Debt collection  I do not know   

                                   issue                          sub_issue  \
0  Cont'd attempts collect debt not owed                      Debt was paid   
1     False statements or representation  Attempted to collect wrong amount   
2                  Communication tactics         Frequent or repeated calls   

                        consumer_complaint_narrative  \
0  I do not own anymore debt from this bank anymo...   
1  Why am I must pay double the amount of my bill...   
2  STOP CALLING ME FOR NO REASON! I REALLY DESPIS...   

                             company_public_response                 company  \
0  We're sorry to hear that. We will communicate ...         Genesis Lending   
1  We're sorry to hear that. We will communicate ...                  Paypal   
2  Hi, sorry 

## Create Combined Narrative

In [19]:
#combine issue, sub_issue, and consumer_complaint_narrative into one column
df_data_dummy['sentence'] = df_data_dummy['issue'] + ' ' + df_data_dummy['sub_issue'] + ' ' + df_data_dummy['consumer_complaint_narrative']

## Text Cleaning

In [20]:
# Define Stopwords
stop_words = set(stopwords.words('english'))

# Define Stemming
lemmatizer = WordNetLemmatizer()

In [21]:
# Create A Function for Text Preprocessing
def text_preprocessing(text):
  # Case folding
  text = text.lower()

  # Mention removal
  text = re.sub("@[A-Za-z0-9_]+", " ", text)

  # Hashtags removal
  text = re.sub("#[A-Za-z0-9_]+", " ", text)

  # Newline removal (\n)
  text = re.sub(r"\\n", " ",text)

  # Whitespace removal
  text = text.strip()

  # URL removal
  text = re.sub(r"http\S+", " ", text)
  text = re.sub(r"www.\S+", " ", text)

  # Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
  text = re.sub("[^A-Za-z\s']", " ", text)
  text = re.sub("'", "", text)

  # Tokenization
  tokens = word_tokenize(text)

  # Stopwords removal
  tokens = [word for word in tokens if word not in stop_words]

  # Stemming
  tokens = [lemmatizer.lemmatize(word) for word in tokens]

  # Combining Tokens
  text = " ".join(tokens)

  return text

In [22]:
sentence = df_data_dummy['sentence'].apply(lambda x: text_preprocessing(x))
sentence

0    contd attempt collect debt owed debt paid anym...
1    false statement representation attempted colle...
2    communication tactic frequent repeated call st...
Name: sentence, dtype: object

## Inference

### Text Vectorizer

In [27]:
#Adapt and set weight for TextVectorizer (digunakan jika local tidak bisa masukin textvectorization ke model)

# vectorizer.adapt(sentence)
# vectorizer.set_weights(vectorization_data['weights'])

In [28]:
#vectorize data before prediction (digunakan jika local tidak bisa masukin textvectorization ke model)

# sentence_vectd = vectorizer(sentence)

### Prediction

In [32]:
#prediction
predict_proba = model_1.predict(sentence)


predict_proba[:,0] = np.where(predict_proba[:,0] > 0.1, 1, 0)  # Thresholding light complaints at 0.1
predict_proba[:,1] = np.where(predict_proba[:,1] > 0.1, 1, 0)  # Thresholding harsh complaints at 0.1
predict_proba[:,2] = np.where(predict_proba[:,2] > 0.75, 1, 0)  # Thresholding mid complaints at 0.75

prediction = predict_proba[0]
print(f'Prediction: {prediction}')
predicted_label = []
if prediction[0] == 1:
    predicted_label.append('light complaint')
if prediction[1] == 1:
    predicted_label.append('harsh complaint')
if prediction[2] == 1:
    predicted_label.append('mid complaint')

print('label for predicted sentence')
for x in predicted_label:
    print(f"-{x}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Prediction: [1. 1. 1.]
