# AI Cyoda configurations Q&A with RAG Langchain

This is a playground for experimenting with connections generation

Install requirements

In [None]:
pip install -r ../requirements.txt

### Load environment variables

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
WORK_DIR = os.environ["WORK_DIR"]
API_KEY = os.environ["CYODA_API_KEY"]
API_SECRET = os.environ["CYODA_API_SECRET"]
API_URL = os.environ["CYODA_API_URL"]+"/api"
GRPC_ADDRESS = os.environ["GRPC_ADDRESS"]
WORK_DIR = os.environ["WORK_DIR"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
WORK_DIR = os.environ["WORK_DIR"]
TOKEN = ""

In [None]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [None]:
%%script echo skipping
##for google colab (optional)
# This cell is optional and can be skipped
from google.colab import userdata
API_KEY = userdata.get('OPENAI_API_KEY')
WORK_DIR = userdata.get('WORK_DIR')

### Handle unsupported version of sqlite3 (optional)

In [None]:
pip install pysqlite3-binary

In [None]:
import sys

__import__("pysqlite3")
sys.modules["sqlite3"] = sys.modules["pysqlite3"]

### Initialize ChatOpenAI

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import GitLoader, DirectoryLoader, TextLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.schema import HumanMessage

In [None]:
llm = ChatOpenAI(
    temperature=0.45,
    max_tokens=4000,
    model="gpt-4o-mini",
    openai_api_key=OPENAI_API_KEY,
)

### Load instructions and entities from the official cyoda repository

In [None]:
%%script echo skipping
loader = GitLoader(
    repo_path=WORK_DIR,
    branch="cyoda-ai-configurations-3.0.x",
    file_filter=lambda file_path: file_path.startswith(f"{WORK_DIR}/data/rag/v1/connections/templates"),
)
docs = loader.load()
print(f"Number of documents loaded: {len(docs)}")

In [None]:
loader = DirectoryLoader(
    f"{WORK_DIR}/data/rag/v1/connections", loader_cls=TextLoader
)
docs = loader.load()
print(f"Number of documents loaded: {len(docs)}")


### Split documents and create vectorstore

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
            documents=splits, embedding=OpenAIEmbeddings()
        )
retriever = vectorstore.as_retriever(
            search_kwargs={"k": 10}
        )

In [None]:
count = vectorstore._collection.count()
print(count)

### Define prompts for contextualizing question and answering question

In [None]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [None]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

### Answer question

In [None]:
qa_system_prompt = """You are a connection generation assistant. \
You will be asked to generate connection configurations. \
First, analyse the human message and choose a template to fill in: [Connections Questionnaire, HttpConnectionDetailsDto, HttpEndpointDto] \
Then fill in the values inside $ with curly brackets in the template. Other values in the template should be preserved. Treat it like a test where you need to fill in the blanks. But you cannot modify values out of the scope of your test. \
Construct and return only the json for the bean you are asked for. Return the resulting json without any comments.  
{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

### Create retrieval chain

In [None]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

### Initialize chat history and relevant functions

In [None]:
chat_history = {}

In [None]:
# Function to add a message to the chat history
def add_to_chat_history(id, question, message):
    if id in chat_history:
        chat_history[id].extend([HumanMessage(content=question), message])
    else:
        chat_history[id] = [HumanMessage(content=question), message]

In [None]:
# Function to clear chat history
def clear_chat_history(id):
    if id in chat_history:
        del chat_history[id]

In [None]:
def ask_question(id, question):
    ai_msg = rag_chain.invoke(
        {"input": question, "chat_history": chat_history.get(id, [])}
    )
    add_to_chat_history(id, question, ai_msg["answer"])
    return ai_msg["answer"]

### Start a chat session

In [None]:
import uuid

# Generate a unique ID for the chat session
id = uuid.uuid1()

In [None]:
#clear chat history if necessary
clear_chat_history(id)

In [None]:
def parse_json(result):
    if result.startswith("```"):
        return "\n".join(result.split("\n")[1:-1])
    if not result.startswith("{"):
        start_index = result.find("```json")
        if start_index != -1:
            start_index += len("```json\n")
            end_index = result.find("```", start_index)
            return result[start_index:end_index].strip()
    return result

In [None]:
def generate_uuid() -> uuid:
    return uuid.uuid1()

generate_uuid()

In [None]:
import uuid
from typing import Optional 
from langchain.agents import tool

@tool
def generate_uuid_tool(meta: Optional[str]) -> uuid:
    """Returns random uuid."""
    return generate_uuid()


generate_uuid_tool.invoke("test")

In [None]:
import uuid
from typing import Optional 
from langchain.agents import tool

@tool
def get_web_page_by_url(url: Optional[str]) -> uuid:
    """Returns random uuid."""
    return generate_uuid()


generate_uuid_tool.invoke("test")

In [None]:
from langchain.agents import tool


@tool
def generate_connection(config: str) -> str:
    """Generates a com.cyoda.plugins.datasource.dtos.connection.HttpConnectionDetailsDto connection for the given data source API configuration"""
    question = f"Create a com.cyoda.plugins.datasource.dtos.connection.HttpConnectionDetailsDto connection for data source: {config}"
    result = ask_question(id, question)
    print(result)
    parsed_result = parse_json(result)
    print(parsed_result)
    return parsed_result


#generate_connection.invoke("ARTISTS_API")

In [None]:
from langchain.agents import tool


@tool
def generate_endpoint(config: str, endpoint: str) -> str:
    """Generates a com.cyoda.plugins.datasource.dtos.endpoint.HttpEndpointDto endpoint config for the given data source API configuration"""
    question = f"Create a com.cyoda.plugins.datasource.dtos.endpoint.HttpEndpointDto endpoint for data source: {config} endpoint {endpoint}"
    result = ask_question(id, question)
    parsed_result = parse_json(result)
    return parsed_result


#generate_endpoint.invoke("ARTISTS_API", "get_by_username")

In [None]:
import json
import jsonschema
from jsonschema import validate


def validate_result(parsed_result: str, file_path: str) -> bool:
    with open(file_path, 'r') as schema_file:
        schema = json.load(schema_file)

    json_data = json.loads(parsed_result)

    try:
        validate(instance=json_data, schema=schema)
        print("JSON is valid.")
        return True
    except jsonschema.exceptions.ValidationError as err:
        print("JSON is invalid:", err.message)
    return False

In [None]:
from langchain.agents import tool


@tool
def validate_result_connection(connection_dto_config: str) -> str:
    """Validates the resulting com.cyoda.plugins.datasource.dtos.connection.HttpConnectionDetailsDto config"""
    result = validate_result(connection_dto_config, f'{WORK_DIR}/data/v1/connections/connection_json_schema.json')
    return result


In [None]:
from langchain.agents import tool


@tool
def validate_result_endpoint(endpoint_dto_config: str) -> str:
    """Validates the resulting com.cyoda.plugins.datasource.dtos.endpoint.HttpEndpointDto config"""
    result = validate_result(endpoint_dto_config, f'{WORK_DIR}/data/v1/connections/endpoint_json_schema.json')
    return result


In [None]:
tools = [generate_uuid, generate_connection, validate_result_connection, generate_endpoint, validate_result_endpoint]

In [None]:
question = f"Fill in Connections Questionnaire json based on the user question: \"Generate connection for ARTISTS_API\". Return only Questionnnaire json."
questionnaire_result = ask_question(id, question)
print(questionnaire_result)
try:
    parsed_questionnaire_result = parse_json(questionnaire_result)
    print(parsed_questionnaire_result)
    parsed_questionnaire_result_json = json.loads(parsed_questionnaire_result)  
except Exception as e:
    print("error")
    print(parsed_questionnaire_result)
    raise e

In [None]:
print(parsed_questionnaire_result_json['connection_name'])
connection_name = parsed_questionnaire_result_json['connection_name']
connection_type = parsed_questionnaire_result_json['connection_type']
connection_base_url = parsed_questionnaire_result_json['connection_base_url']
connection_endpoints = parsed_questionnaire_result_json['connection_endpoints']

In [None]:
question = f"Write com.cyoda.plugins.datasource.dtos.connection.HttpConnectionDetailsDto connection config for api {connection_name} with base_url {connection_base_url}. Return only com.cyoda.plugins.datasource.dtos.connection.HttpConnectionDetailsDto json."
connection_result = ask_question(id, question)
print(connection_result)
try:
    parsed_connection_result = parse_json(connection_result)
    print(parsed_connection_result)
    result = validate_result(parsed_connection_result, f'{WORK_DIR}/data/v1/connections/connection_json_schema.json')
    if not result:
        print("error")
        print(parsed_connection_result)           
except Exception as e:
    print("error")
    print(parsed_connection_result)
    raise e

In [None]:
print(type(parsed_connection_result))

In [None]:
endpoints_list = connection_endpoints
print(type(endpoints_list))

In [None]:
endpoint_configs = []
for endpoint_name in endpoints_list:
    print(endpoint_name)
    question = f"Now generate com.cyoda.plugins.datasource.dtos.endpoint.HttpEndpointDto endpoint config for {endpoint_name}. Only one endpoint. Return only com.cyoda.plugins.datasource.dtos.endpoint.HttpEndpointDto json."
    endpoint_result = ask_question(id, question)
    print(endpoint_result)
    try:
        parsed_endpoint_result = parse_json(endpoint_result)
        print(parsed_endpoint_result)
        result = validate_result(parsed_endpoint_result, f'{WORK_DIR}/data/v1/connections/endpoint_json_schema.json')
        if result:
            endpoint_configs.append(parsed_endpoint_result)
        else:
            print("error")
            print(parsed_endpoint_result)           
    except Exception as e:
        print("error")
        print(parsed_endpoint_result)
        raise e

In [None]:
print(endpoint_configs)
print(type(endpoint_configs[0]))
print(len(endpoint_configs))

In [None]:
def build_result_connection(connection_template_file_path: str, name: str, connection, endpoints):
    data = ''
    try:
        with open(connection_template_file_path, 'r') as file:
            data = file.read()
    except Exception as e:
        logger.error(f"Failed to read JSON file: {e}")
        return

    try:
        data = json.loads(data)  # Parse the JSON string into a dictionary
    except json.JSONDecodeError as e:
        logger.error(f"Failed to decode JSON: {e}")
        return
    try:
        data['dataSources'][0]['name']=name
        data['dataSources'][0]['id']=str(generate_uuid())
        data['dataSources'][0]['connections'].append(connection)
        data['dataSources'][0]['endpoints'].extend(endpoints)
    except json.JSONDecodeError as e:
        logger.error(f"Failed to decode result as JSON: {e}")
        return

    # Convert the modified dictionary back to a JSON string
    data = json.dumps(data, indent=4)
    print(data)
    return data

In [None]:
parsed_connection_result_json = json.loads(parsed_connection_result)
endpoint_configs_str = json.dumps(endpoint_configs, indent=4)
endpoint_configs_json = [json.loads(item) for item in endpoint_configs]
print(type(parsed_connection_result_json))
print(type(endpoint_configs_json))
result = build_result_connection(f'{WORK_DIR}/data/v1/connections/connection_dto_template.json', 
                                 'ARTISTS_API',
                                 parsed_connection_result_json,
                                 endpoint_configs_json )

In [None]:
print(chat_history)

In [None]:
#clear chat history if necessary
clear_chat_history(id)

In [None]:
import requests
import json

api_url = API_URL + "/auth/login"
headers = {"Content-Type": "application/json", "X-Requested-With": "XMLHttpRequest"}
auth_data = {"username": API_KEY, "password": API_SECRET}
response = requests.post(api_url, headers=headers, data=json.dumps(auth_data))
if response.status_code == 200:
    logger.info("Authentication successful!")
    TOKEN = response.json().get("token")
else:
    logger.info("Authentication failed. Please check your API credentials.")
    
def send_post_request(
    token: str, api_url: str, path: str, data, json
) -> Optional[requests.Response]:
    url = f"{api_url}/{path}"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {token}",
    }
    try:
        response = requests.post(url, headers=headers, data=data, json=json)
        return response
    except requests.exceptions.HTTPError as http_err:
        logger.error(f"HTTP error occurred: {http_err}")
    except Exception as err:
        logger.error(f"Other error occurred: {err}")
    return None

In [None]:
def save_data(data):
    path = f"data-source-config/import-cobi-config?cleanBeforeImport=false&doPostProcess=false"
    response = send_post_request(token=TOKEN, api_url=API_URL, path=path, data=data, json=None)
    logger.info(response)
    return response

In [None]:
print(result)
print(type(result))
save_data(result)