In [1]:
import phoenix as px
import llama_index
from llama_index.callbacks import arize_phoenix
from sqlalchemy import create_engine
from glob import glob
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# not work with JSON Query Engine
px.launch_app()
llama_index.core.set_global_handler("arize_phoenix")

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [3]:
os.environ["OPENAI_API_KEY"] = ""
os.environ["TAVILY_API_KEY"] = ""
os.environ["COHERE_API_KEY"] = ""

# Create Pseudo DB

In [4]:
engine = create_engine(f"path_to_/my_database_no_agg.db")

In [5]:
from llama_index.core import SQLDatabase

sql_database = SQLDatabase(engine)

In [27]:
import os
import json

# Specify the folder path where the JSON files are located
folder_path = "../../misc/TableInfo"

# Create an empty list to store the data
table_infos = []

# Loop through all files in the folder
for filename in os.listdir(folder_path):
    # Check if the file is a JSON file
    if filename.endswith(".json"):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        
        # Open the JSON file and load its contents
        with open(file_path, "r") as file:
            data = json.load(file)
            
        # Append the data to the list
        table_infos.append(data)

# Print the resulting list
print(table_infos)

[{'table_name': 'applprev', 'table_summary': 'The table contains information about case_id, max_actualdpd_943P, max_annuity_853A, max_credacc_actualbalance_314A, max_credacc_credlmt_575A, and other related data for different cases.'}, {'table_name': 'base', 'table_summary': 'The table contains case_id, date_decision, MONTH, WEEK_NUM, and target columns with corresponding data.'}, {'table_name': 'credit_bureau_a', 'table_summary': "The table contains dept 1 information about credit bureau of each case_id from datasource 'a'. Contains columns of case IDs and various credit-related attributes such as credit limits, debt outstanding, overdue amounts, installment amounts, dates, financial institutions, and more."}, {'table_name': 'credit_bureau_b', 'table_summary': "The table contains dept 1 information about credit bureau of each case_id from datasource 'b'. Contains columns of case IDs and various credit-related attributes such as maximum amounts, credit limits, debt values, installment a

In [29]:
from llama_index.core.objects import (
    SQLTableNodeMapping,
    ObjectIndex,
    SQLTableSchema,
)
from llama_index.core import SQLDatabase, VectorStoreIndex
engine = create_engine(f"path_to_/my_database_no_agg.db")
sql_database = SQLDatabase(engine)

table_node_mapping = SQLTableNodeMapping(sql_database)
table_schema_objs = [
    SQLTableSchema(table_name=t["table_name"], context_str=t["table_summary"])
    for t in table_infos
]  # add a SQLTableSchema for each table

# Add Data description

In [44]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

In [71]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader("D:\Coding\KBTG_HACK\langchain\data_description")

In [72]:
docs = reader.load_data()

# VectorStore

In [74]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.response.pprint_utils import pprint_response

In [75]:
index = VectorStoreIndex.from_documents(documents=docs)

# RAG

In [76]:
from llama_index.postprocessor.cohere_rerank import CohereRerank
api_key = os.environ["COHERE_API_KEY"]
cohere_rerank = CohereRerank(api_key=api_key, top_n=2)

In [92]:
vector_engine = index.as_query_engine(
    similarity_top_k=3,
    node_postprocessors=[cohere_rerank],
)


In [105]:
response = vector_engine.query(
    "If I were to create Machine Learning to Predict credit score, I want you to act as an expert and tell me 10 features that expert would extract by modified table to make the model better and how to create from the columns. Assume I turn the table in to pandas DF then also write the pandas Executable",
)

In [106]:
pprint_response(response, show_source=True)

Final Response: The expert would likely extract the following 10
features from the modified table to enhance the credit score
prediction model:  1. Average Debt Past Due Value 2. Total Credit
Limit 3. Number of Credit Contracts 4. Maximum Days Past Due 5.
Installment Amount 6. Effective Interest Rate 7. Number of Pending
Payments 8. Purpose of Credit 9. Residual Amount 10. Total Amount of
Credit  To create these features from the columns in the table, you
can use aggregation functions like mean, sum, max, etc. based on the
specific requirements of each feature. Below is the pandas executable
code to create these features:  ```python # Assuming 'df' is the
pandas DataFrame containing the table 'credit_bureau_b'  # Feature 1:
Average Debt Past Due Value df['avg_debt_past_due'] =
df[['debtpastduevalue_732A', 'dpd_550P', 'dpd_733P']].mean(axis=1)  #
Feature 2: Total Credit Limit df['total_credit_limit'] =
df[['credlmt_1052A', 'credlmt_228A', 'credlmt_3940954A']].sum(axis=1)
# Feature 3: Nu

# Decompose Queries RAG

In [88]:
from llama_index.core.indices.query.query_transform.base import (
    StepDecomposeQueryTransform,
)

In [89]:
gpt35 = OpenAI(temperature=0, model="gpt-3.5-turbo")

In [90]:
step_decompose_transform_gpt3 = StepDecomposeQueryTransform(
    llm=gpt35, verbose=True
)

In [65]:

# set Logging to DEBUG for more detailed outputs
from llama_index.core.query_engine import MultiStepQueryEngine

query_engine = index.as_query_engine(llm=gpt35)
query_engine = MultiStepQueryEngine(
    query_
    query_engine=query_engine,
    query_transform=step_decompose_transform_gpt3,
    index_summary="use to answer questions about the database",
)


In [67]:
response = query_engine.query("tell be breif summary 3 columns meaning from the table credit bureau")

[1;3;33m> Current query: tell be breif summary 3 columns meaning from the table credit bureau
[0m[1;3;38;5;200m> New query: What are the three columns in the credit bureau table and what do they represent?
[0m[1;3;33m> Current query: tell be breif summary 3 columns meaning from the table credit bureau
[0m[1;3;38;5;200m> New query: What is the significance of the 'classificationofcontr_1114M' column in the credit bureau table?
[0m[1;3;33m> Current query: tell be breif summary 3 columns meaning from the table credit bureau
[0m[1;3;38;5;200m> New query: What information does the 'contracttype_653M' column in the credit bureau table represent?
[0m

In [68]:
print(response)

The three columns in the credit bureau table provide essential information about the contracts and credit details associated with clients. The 'classificationofcontr_1114M' column likely categorizes the contracts, the 'contracttype_653M' column specifies the type of contract (e.g., loan, credit card), and the 'credor_3940957M' column identifies the creditor linked to the contract agreements.


In [69]:
pprint_response(response, show_source=True)

Final Response: The three columns in the credit bureau table provide
essential information about the contracts and credit details
associated with clients. The 'classificationofcontr_1114M' column
likely categorizes the contracts, the 'contracttype_653M' column
specifies the type of contract (e.g., loan, credit card), and the
'credor_3940957M' column identifies the creditor linked to the
contract agreements.
______________________________________________________________________
Source Node 1/9
Node ID: 11b0cfc3-4334-4abb-b9f7-baa87456f895
Similarity: None
Text: Question: What are the three columns in the credit bureau table
and what do they represent? Answer: The three columns in the credit
bureau table are 'classificationofcontr_1114M', 'contracttype_653M',
and 'credor_3940957M'. They represent the classification of the
contract, the type of contract, and the creditor associated with the
contract, res...
______________________________________________________________________
Source Node