In [1]:
import os
import sys
sys.path.append("../../common/src")
#sys.path.append("../../tools_service/src")
sys.path.append("../src")
os.chdir("../src")

In [2]:
PROJECT_ID = os.environ["PROJECT_ID"]

In [3]:
from common import config 
from config import (VERTEX_LLM_TYPE_BISON_CHAT,
                    VERTEX_LLM_TYPE_BISON_TEXT,
                    VERTEX_AI_MODEL_GARDEN_LLAMA2_CHAT,
                    OPENAI_LLM_TYPE_GPT3_5,
                    OPENAI_LLM_TYPE_GPT4,
                    LLM_BACKEND_ROBOT_USERNAME,
                    LLM_BACKEND_ROBOT_PASSWORD,
                    LANGCHAIN_LLM,
                    REGION)
from common.utils.token_handler import UserCredentials
config.TOOLS_SERVICE_BASE_URL = f"https://{PROJECT_ID}..cloudpssolutions.com/tools-service/api/v1"
config.auth_client = UserCredentials(LLM_BACKEND_ROBOT_USERNAME,
                              LLM_BACKEND_ROBOT_PASSWORD,
                              f"https:/{PROJECT_ID}.cloudpssolutions.com")

INFO: [config/config.py:54 - <module>()] Namespace File not found, setting job namespace as default
INFO: [config/config.py:87 - get_environ_flag()] ENABLE_GOOGLE_LLM = True
INFO: [config/config.py:87 - get_environ_flag()] ENABLE_GOOGLE_MODEL_GARDEN = True
INFO: [config/config.py:87 - get_environ_flag()] ENABLE_LLAMA2CPP_LLM = False
INFO: [config/config.py:87 - get_environ_flag()] ENABLE_OPENAI_LLM = True
INFO: [config/config.py:87 - get_environ_flag()] ENABLE_COHERE_LLM = True
INFO: [config/config.py:87 - get_environ_flag()] ENABLE_TRUSS_LLAMA2 = True
INFO: [config/config.py:249 - <module>()] Can't load llm_service_models.json: 404 Secret [projects/877759600672/secrets/llm_service_password_Llama2cpp] not found or has no versions.
INFO: [config/config.py:261 - <module>()] LLM types loaded ['OpenAI-GPT3.5', 'OpenAI-GPT4', 'Cohere', 'VertexAI-Text', 'VertexAI-Chat-V1', 'VertexAI-Chat']
INFO: [config/config.py:286 - <module>()] Embedding models loaded ['VertexAI-Embedding', 'OpenAI-Embedd

In [6]:
from common.utils.secrets import get_secret

In [7]:
OPENAI_API_KEY = get_secret("openai-api-key")

In [8]:
from google.cloud import bigquery
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.llms.openai import OpenAI
from langchain.agents import AgentExecutor
from langchain.chat_models import ChatVertexAI
from langchain_experimental.sql import SQLDatabaseChain


service_account_file = "./data/gcp-mira-develop-gce-service-account.json" # Change to where your service account key file is located

dataset = "fqhc_medical_transactions"



sqlalchemy_url = f'bigquery://{PROJECT_ID}/{dataset}?credentials_path={service_account_file}'

In [9]:
db = SQLDatabase.from_uri(sqlalchemy_url)



In [10]:
llm =  LANGCHAIN_LLM[OPENAI_LLM_TYPE_GPT4]

In [11]:
#llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [12]:
agent_executor = create_sql_agent(
                llm=llm,
                toolkit=toolkit,
                verbose=True,
                top_k=300
)

In [13]:
# First query
returnVal = agent_executor.run("Which CDT codes are medicaid approved? Parse the results into a JSON object with the first value as columns that contains the names of all the columns, and the second value as data that contains the result of the SQL query")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mCdtCode, ClaimTransaction, FqhcProvider[0m
Thought:[32;1m[1;3mThe tables CdtCode and ClaimTransaction seem relevant to the question. I should check their schemas to understand the structure of the data.
Action: sql_db_schema
Action Input: CdtCode, ClaimTransaction[0m
Observation: [33;1m[1;3m
CREATE TABLE `CdtCode` (
	`cdtCodeId` STRING, 
	`codeDescription` STRING, 
	`medicaidApproved` BOOL
)

/*
3 rows from CdtCode table:
cdtCodeId	codeDescription	medicaidApproved
D0145	oral evaluation for a patient under three years of age and counseling with primary caregiver	False
D0171	re-evaluation – post-operative office visit	False
D0190	screening of a patient	False
*/


CREATE TABLE `ClaimTransaction` (
	`claimTransactionId` INT64, 
	`fqhcProviderId` INT64, 
	`cdtCodeId` STRING, 
	`userId` INT64, 
	`userEmail` STRING, 
	`submitDateTime` TIMESTAMP
)

/*

In [14]:
returnVal

'{"columns": ["cdtCodeId", "codeDescription"], "data": [["D0120", "periodic oral evaluation - established patient"], ["D0140", "limited oral evaluation - problem focused"], ["D0150", "comprehensive oral evaluation - new or established patient"], ["D0160", "detailed and extensive oral evaluation - problem focused, by report"], ["D0170", "re-evaluation - limited, problem focused (established patient; not post-operative visit)"], ["D0180", "comprehensive periodontal evaluation - new or established patient"], ["D0210", "intraoral - complete series of radiographic images"], ["D0220", "intraoral - periapical first radiographic image"], ["D0230", "intraoral - periapical each additional radiographic image"], ["D0240", "intraoral - occlusal radiographic image"], ["D0250", "extra-oral – 2D projection radiographic image created using a stationary radiation source, and detector"], ["D0270", "bitewing - single radiographic image"], ["D0320", "temporomandibular joint arthrogram, including injection"

In [15]:
# Second query
agent_executor.run("""Provide the FQHC id and names that had a Claim Transaction that used an invalide CDT code. 
Group the results by FQHC ID. Parse the results into a JSON object with the first value as columns that contains the names of all the columns, and the second value as data that contains the result of the SQL query """)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mCdtCode, ClaimTransaction, FqhcProvider[0m
Thought:[32;1m[1;3mThe tables CdtCode, ClaimTransaction, and FqhcProvider seem relevant to the question. I should check their schemas to understand their structure and the relationships between them.
Action: sql_db_schema
Action Input: "CdtCode, ClaimTransaction, FqhcProvider"[0m
Observation: [33;1m[1;3m
CREATE TABLE `CdtCode` (
	`cdtCodeId` STRING, 
	`codeDescription` STRING, 
	`medicaidApproved` BOOL
)

/*
3 rows from CdtCode table:
cdtCodeId	codeDescription	medicaidApproved
D0145	oral evaluation for a patient under three years of age and counseling with primary caregiver	False
D0171	re-evaluation – post-operative office visit	False
D0190	screening of a patient	False
*/


CREATE TABLE `ClaimTransaction` (
	`claimTransactionId` INT64, 
	`fqhcProviderId` INT64, 
	`cdtCodeId` STRING, 
	`userId` INT64, 

'{"columns": ["fqhcProviderId", "providerName"], "data": [[245029, "ASTOR HOME FOR CHILDREN"], [275085, "ACCESS SUPPORTS FOR LIVING INC"], [354114, "ALICE HYDE MEDICAL CENTER"], [384969, "ANTHONY L JORDAN HEALTH CTR"], [542567, "ASTOR HOME FOR CHILDREN"], [730483, "BED STUY FAMILY HLTH CTR"], [740423, "ALLEGANY REHABILITATION ASSOC"], [1551555, "AIDS CTR OF QUEENS COUNTY INC"], [1928569, "AHRC HEALTH CARE INC"], [2977099, "ALLEGANY REHABILITATION ASSOC INC"], [2994741, "ALLEGANY REHABILITATION ASSOC"], [2994769, "ASTOR HOME FOR CHILDREN"], [2996601, "AIDS CENTER QUEENS COUNTY INC"], [3801892, "ANTHONY L JORDAN HEALTH CENTER"], [4073630, "ANTHONY L JORDAN HEALTH CENTER"], [5260779, "121ST STREET FAMILY HEALTH CENTER"]]}'

In [None]:
#use just SQLChain
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [None]:
returnVal =db_chain.run("Which CDT codes are medicaid approved? ")

In [None]:
db_chain.run("""Provide the FQHC id and names that had a Claim Transaction that used a CDT code that was not Medicaid approved. 
Group the results by FQHC ID.  """)

In [None]:
#Use VertexAI

In [None]:
#MODEL_NAME= VERTEX_LLM_TYPE_BISON_TEXT
MODEL_NAME = "codechat-bison"

In [None]:
llm =  LANGCHAIN_LLM[VERTEX_LLM_TYPE_BISON_CHAT]

In [None]:
llm = ChatVertexAI(project=PROJECT_ID, 
                   temperature=0.0, 
                   max_output_tokens=256)

In [None]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [None]:
agent_executor = create_sql_agent(
                llm=llm,
                toolkit=toolkit,
                verbose=True,
                top_k=300
)

In [None]:
agent_executor.run("Which CDT codes are medicaid approved? ")
