In [1]:
import numpy as np
import pandas as pd
import timeit
import boto3
from tqdm import tqdm
from teradataml import *
from langchain.llms.bedrock import Bedrock

display.max_rows = 5

In [2]:
%run -i ../startup.ipynb
eng = create_context(host = 'host.docker.internal', username='demo_user', password = password)
print(eng)
execute_sql('''SET query_band='DEMO=Topic_Modelling.ipynb;' UPDATE FOR SESSION;''')

Performing setup ...
Setup complete



Enter password:  ········


... Logon successful
Connected as: xxxxxsql://demo_user:xxxxx@host.docker.internal/dbc
Engine(teradatasql://demo_user:***@host.docker.internal)


TeradataCursor uRowsHandle=15 bClosed=False

In [3]:
# %run -i ../run_procedure.py "call get_data('DEMO_ComplaintAnalysis_cloud');"        # Takes 1 minute
%run -i ../run_procedure.py "call get_data('DEMO_ComplaintAnalysis_local');"        # Takes 2 minutes

Database DEMO_ComplaintAnalysis_local exists


In [4]:
df = DataFrame(in_schema('DEMO_ComplaintAnalysis', 'Consumer_Complaints'))

In [5]:
def configure_aws():
    print("configure the AWS CLI")
    # enter the access_key/secret_key
    access_key = getpass.getpass("aws_access_key_id ")
    secret_key = getpass.getpass("aws_secret_access_key ")
    region_name = getpass.getpass("region name")

    #set to the env
    !aws configure set aws_access_key_id {access_key}
    !aws configure set aws_secret_access_key {secret_key}
    !aws configure set default.region {region_name}

In [6]:
does_access_key_exists = !aws configure get aws_access_key_id

if len(does_access_key_exists) == 0:
    configure_aws()

In [7]:
!aws configure list

      Name                    Value             Type    Location
      ----                    -----             ----    --------
   profile                <not set>             None    None
access_key     ****************GXKN shared-credentials-file    
secret_key     ****************u8mf shared-credentials-file    
    region                us-east-1      config-file    ~/.aws/config


In [8]:
df = df[df.product == 'Mortgage']

In [9]:
df.select(['issue', 'sub_issue', 'complaint_id']).groupby(['issue', 'sub_issue']).agg(['count']).sort('count_complaint_id', ascending = False)

issue,sub_issue,count_complaint_id
Applying for a mortgage or refinancing an existing mortgage,,15
Trouble during payment process,,11
Closing on a mortgage,,3
Struggling to pay mortgage,,2
Incorrect information on your report,Account information incorrect,2


<!-- - Incorrect information on your report: Information belongs to someone else
- Problem with a purchase shown on your statement: Credit card company isn't resolving a dispute about a purchase on your statement
- Problem with a credit reporting company's investigation into an existing problem: Their investigation did not fix an error on your report
- Improper use of your report: Reporting company used your report improperly
- Improper use of your report: Credit inquiries on your report that you don't recognize

<hr>
<hr>

- Incorrect Information: Belongs to someone else.
- Purchase Dispute: Credit card company issue.
- Credit Report Error: Investigation didn’t fix it.
- Improper Use: Reporting company misuse.
- Unauthorized Inquiries: Unrecognized credit inquiries. -->

- Applying for a mortgage or refinancing an existing mortgage
- Trouble during payment process
- Closing on a mortgage
- Incorrect information on your report
- Struggling to pay mortgage

<hr>
<hr>

- Mortgage Application: Applying or refinancing.
- Payment Trouble: Issues during payment.
- Mortgage Closing: Finalizing the mortgage.
- Report Inaccuracy: Incorrect information.
- Payment Struggle: Difficulty paying.

In [10]:
## Bedrock Clients
bedrock=boto3.client(service_name="bedrock-runtime", region_name='us-east-1')

def get_llm():
    return Bedrock(model_id="ai21.j2-mid-v1",client=bedrock,
                model_kwargs={'temperature': 0.9, 'maxTokens': 5, "stopSequences":["$$"],"countPenalty":{"scale":0},"presencePenalty":{"scale":0}})

In [11]:
ai21 = get_llm()

In [12]:
pd_df = df.to_pandas()

In [13]:
prompt = f'''
    User prompt:
    The following is text from a complaint:

    “{pd_df['consumer_complaint_narrative'][0]}”

    Identify the topic of the complaint and categorize into one of the following topics. Only output one of the following options:

    - Mortgage Application
    - Payment Trouble
    - Mortgage Closing
    - Report Inaccuracy
    - Payment Struggle
    
    Return just one of the above options
'''

ai21(prompt = prompt)

'Mortgage Application'

In [14]:
pd_df['Predicted_Topic'] = ''

In [15]:
for i in tqdm(range(len(pd_df))):
    prompt = f'''
    User prompt:
    The following is text from a complaint:

    “{pd_df['consumer_complaint_narrative'][i]}”

    Identify the topic of the complaint and categorize into one of the following topics. Only output one of the following options:

    - Mortgage Application
    - Payment Trouble
    - Mortgage Closing
    - Report Inaccuracy
    - Payment Struggle
    
    Return just one of the above options
'''
    c = ai21(prompt = prompt)
    pd_df['Predicted_Topic'][i] = c

100%|██████████| 43/43 [00:21<00:00,  2.02it/s]


In [16]:
pd_df

Unnamed: 0,date_received,product,sub_product,issue,sub_issue,consumer_complaint_narrative,company_public_response,company,state,zip_code,tags,consumer_consent_provided,submitted_via,date_sent_to_company,company_response_to_consumer,timely_response,consumer_disputed,complaint_id,Predicted_Topic
0,2022-12-04,Mortgage,Home equity loan or line of credit (HELOC),Applying for a mortgage or refinancing an exis...,,I received a postcard from XXXX about a Home E...,,DISCOVER BANK,FL,33771,,Consent provided,Web,2022-12-04,Closed with non-monetary relief,Yes,,6279246,Mortgage Application
1,2024-01-11,Mortgage,Home equity loan or line of credit (HELOC),Closing on a mortgage,Delays with the closing process,I received an email from XXXX XXXX the senior ...,,DISCOVER BANK,PA,183XX,Servicemember,Consent provided,Web,2024-01-11,Closed with explanation,Yes,,8150578,Mortgage Application
2,2022-08-10,Mortgage,Home equity loan or line of credit (HELOC),Struggling to pay mortgage,,I have a mortgage with Discover Home Loans. I ...,,DISCOVER BANK,FL,34982,Older American,Consent provided,Web,2022-08-10,Closed with explanation,Yes,,5859414,Payment Trouble
3,2022-11-14,Mortgage,Home equity loan or line of credit (HELOC),Trouble during payment process,,On XX/XX/2022 - Discover took duplicate paymen...,,DISCOVER BANK,GA,30040,Servicemember,Consent provided,Web,2022-11-14,Closed with explanation,Yes,,6200804,Payment Trouble
4,2018-01-23,Mortgage,Home equity loan or line of credit (HELOC),Closing on a mortgage,,I am employed as a XXXX XXXX for a XXXX XXXX X...,,DISCOVER BANK,FL,33193,,Consent provided,Web,2018-01-23,Closed with explanation,Yes,,2790846,Payment Trouble
5,2022-03-08,Mortgage,Home equity loan or line of credit (HELOC),Applying for a mortgage or refinancing an exis...,,Discover Bank advertises Home Equity loan rate...,,DISCOVER BANK,TX,75043,,Consent provided,Web,2022-03-08,Closed with explanation,Yes,,5298752,Mortgage Application
6,2018-10-14,Mortgage,Home equity loan or line of credit (HELOC),Applying for a mortgage or refinancing an exis...,,Our first mortgage is privately held. It is re...,,DISCOVER BANK,CA,91730,,Consent provided,Web,2018-10-14,Closed with explanation,Yes,,3045578,Mortgage Application
7,2015-10-13,Mortgage,Home equity loan or line of credit,"Loan servicing, payments, escrow account",,I am currently in the process of a refinance. ...,,DISCOVER BANK,CA,91913,,Consent provided,Web,2015-10-15,Closed with monetary relief,Yes,No,1604243,Mortgage Application
8,2019-11-11,Mortgage,Home equity loan or line of credit (HELOC),Applying for a mortgage or refinancing an exis...,,Discover Home Loans advertisers a process in ...,,DISCOVER BANK,AL,350XX,,Consent provided,Web,2019-11-11,Closed with explanation,Yes,,3434676,Mortgage Application
9,2017-01-12,Mortgage,Home equity loan or line of credit,"Application, originator, mortgage broker",,I applied for a Home Equity Loan through Disco...,,DISCOVER BANK,OH,44313,,Consent provided,Web,2017-01-17,Closed with explanation,Yes,Yes,2286272,Mortgage Application


In [17]:
copy_to_sql(df = pd_df, table_name = 'topic_prediction', if_exists = 'replace')

In [18]:
remove_context()

True