## Load CFPB Complaints data - 1K records ##


In [None]:
import pandas as pd
import numpy as np
import json
from teradataml import *

from collections import OrderedDict
from teradatasqlalchemy.types import *

# load vars json
with open('vars-vs_demo.json', 'r') as f:
    session_vars = json.load(f)

name = session_vars['hierarchy']['SYSDBA']['username']
pwd = session_vars['hierarchy']['SYSDBA']['password']
host = session_vars['environment']['host']

eng = create_context(host = host, username = name, password = pwd)


database = 'demo_ofs'

In [None]:
df_topics = pd.read_csv('Data/topics_of_interest.csv')

try:
    execute_sql(f'DROP TABLE {database}.topics_of_interest;')
except Exception as e:
    # Table already exists
    if str(e.args).find("3807") >= 1:
        pass
    else:
        raise

qry = f'''
CREATE MULTISET TABLE {database}.topics_of_interest,
     STORAGE = TD_OFSSTORAGE 
     (
      id BIGINT,
      txt VARCHAR(1024) CHARACTER SET UNICODE NOT CASESPECIFIC)
NO PRIMARY INDEX 
PARTITION BY COLUMN;'''

execute_sql(qry)
copy_to_sql(df_topics, table_name = 'topics_of_interest', schema_name=database, if_exists = 'append')

In [None]:
df_topics_embeddings = pd.read_csv('Data/Topics_Embeddings.csv')
copy_to_sql(df_topics_embeddings, table_name = 'topics_embeddings', schema_name=database, if_exists = 'replace')

In [None]:
df_complaints = pd.read_csv('Data/CFPB_Complaints.csv')

for i in df_complaints.columns:
    if df_complaints[i].dtype == 'O':
        df_complaints[i] = df_complaints[i].astype(str)

try:
    execute_sql(f'DROP TABLE {database}.CFPB_Complaints_1K;')
except Exception as e:
    # Table already exists
    if str(e.args).find("3807") >= 1:
        pass
    else:
        raise
qry = f'''
CREATE MULTISET TABLE {database}.CFPB_Complaints_1K,
STORAGE = TD_OFSSTORAGE   (
    "Date received" DATE,
    Product VARCHAR(100) CHARACTER SET UNICODE NOT CASESPECIFIC,     
    "Sub-product" VARCHAR(100) CHARACTER SET UNICODE NOT CASESPECIFIC,
    Issue VARCHAR(200) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "Sub-issue" VARCHAR(200) CHARACTER SET UNICODE NOT CASESPECIFIC,
    txt VARCHAR(1024) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "Company public response" VARCHAR(1024) CHARACTER SET UNICODE NOT CASESPECIFIC,
    Company VARCHAR(100) CHARACTER SET UNICODE NOT CASESPECIFIC,
    State VARCHAR(3) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "ZIP code" VARCHAR(10) CHARACTER SET UNICODE NOT CASESPECIFIC,
    Tags VARCHAR(100) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "Consumer consent provided?" VARCHAR(5) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "Submitted via" VARCHAR(10) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "Date sent to company" DATE,
    "Company response to consumer" VARCHAR(1024) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "Timely response?" VARCHAR(2) CHARACTER SET UNICODE NOT CASESPECIFIC,
    "Consumer disputed?" VARCHAR(2) CHARACTER SET UNICODE NOT CASESPECIFIC,
    id BIGINT)
    NO PRIMARY INDEX ;'''

execute_sql(qry)
copy_to_sql(df_complaints, table_name = 'CFPB_Complaints_1K', schema_name=database, if_exists = 'append')

In [None]:
df_complaints_embeddings = pd.read_csv('Data/CFPB_Embeddings.csv')
copy_to_sql(df_complaints_embeddings, table_name = 'CFPB_embeddings_1K', schema_name=database, if_exists = 'replace')

In [None]:
remove_context()