# AI Cyoda configurations Q&A with RAG Langchain

This is a playground for experimenting with workflow generation

Install requirements

In [None]:
pip install -r ../requirements.txt

### Load environment variables

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
WORK_DIR = os.environ["WORK_DIR"]
API_URL = os.environ["CYODA_API_URL"]+"/api"
API_KEY = os.environ["CYODA_API_KEY"]
API_SECRET = os.environ["CYODA_API_SECRET"]

In [None]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [None]:
%%script echo skipping
##for google colab (optional)
# This cell is optional and can be skipped
from google.colab import userdata
API_KEY = userdata.get('OPENAI_API_KEY')
WORK_DIR = userdata.get('WORK_DIR')

### Handle unsupported version of sqlite3 (optional)

In [None]:
pip install pysqlite3-binary

In [None]:
import sys

__import__("pysqlite3")
sys.modules["sqlite3"] = sys.modules["pysqlite3"]

### Initialize ChatOpenAI

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import GitLoader, DirectoryLoader, TextLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.schema import HumanMessage

In [None]:
llm = ChatOpenAI(
    temperature=0.7,
    max_tokens=16000,
    model="gpt-4o-mini",
    openai_api_key=OPENAI_API_KEY,
)

In [None]:
print(WORK_DIR)

In [None]:
loader = DirectoryLoader(
    f"{WORK_DIR}/data/rag/v1/workflows", loader_cls=TextLoader
)
docs = loader.load()
print(f"Number of documents loaded: {len(docs)}")

In [None]:
%%script echo skipping
loader = GitLoader(repo_path="/tmp/cyoda-ai-chat",
                   clone_url="https://github.com/Cyoda-platform/cyoda-ai-chat",
                   branch="develop")
docs = loader.load()
print(f"Number of documents loaded: {len(docs)}")

### Split documents and create vectorstore

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
            documents=splits, embedding=OpenAIEmbeddings()
        )
retriever = vectorstore.as_retriever(
            search_kwargs={"k": 10}
        )

In [None]:
count = vectorstore._collection.count()
print(count)

### Define prompts for contextualizing question and answering question

In [None]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [None]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [None]:
def read_file_to_string(file_path):
    with open(file_path, 'r') as file:
        return file.read()

### Answer question

In [None]:
qa_system_prompt = """{prompt} /
{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

### Create retrieval chain

In [None]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

### Initialize chat history and relevant functions

In [None]:
chat_history = {}

In [None]:
# Function to add a message to the chat history
def add_to_chat_history(id, question, message):
    if id in chat_history:
        chat_history[id].extend([HumanMessage(content=question), message])
    else:
        chat_history[id] = [HumanMessage(content=question), message]

In [None]:
# Function to clear chat history
def clear_chat_history(id):
    if id in chat_history:
        del chat_history[id]

In [None]:
prompt = read_file_to_string(f"{WORK_DIR}/data/v1/workflows/prompt.txt")
def ask_question(id, question):
    ai_msg = rag_chain.invoke(
        {"input": question, "chat_history": chat_history.get(id, []), "prompt": prompt }
    )
    add_to_chat_history(id, question, ai_msg["answer"])
    return ai_msg["answer"]

### Start a chat session

In [None]:
import uuid

# Generate a unique ID for the chat session
id = uuid.uuid1()

In [None]:
##Rewrite CyodaCalculationMemberClient to java. Produce ready to use code. Leave spring boot
question = "hello, how r u"
result = ask_question(id, question)
print(result)

In [None]:
print(chat_history)

In [None]:
#clear chat history if necessary
clear_chat_history(id)

In [None]:
import requests
import json

api_url = API_URL + "/auth/login"
headers = {"Content-Type": "application/json", "X-Requested-With": "XMLHttpRequest"}
auth_data = {"username": API_KEY, "password": API_SECRET}
logger.info(api_url)
response = requests.post(api_url, headers=headers, data=json.dumps(auth_data))
if response.status_code == 200:
    logger.info("Authentication successful!")
    TOKEN = response.json().get("token")
else:
    logger.info("Authentication failed. Please check your API credentials.")

In [None]:
def send_post_request(path, data):
    url = f"{API_URL}/{path}"
    print(url)
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {TOKEN}"}
    response = requests.post(url, headers=headers, data=data)
    return response

In [None]:
def send_get_request(path):
    url = f"{API_URL}/{path}"

    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {TOKEN}"}
    response = requests.get(url, headers=headers)
    return response

In [None]:
def read_file(file_path):
    """Read and return JSON data from a file."""
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except Exception as e:
        logger.error(f"Failed to read JSON file {file_path}: {e}")
        raise

In [None]:

def read_json_file(file_path):
    """Read and return JSON data from a file."""
    try:
        with open(file_path, 'r') as file:
            return json.load(file)
    except Exception as e:
        logger.error(f"Failed to read JSON file {file_path}: {e}")
        raise

In [None]:
def get_workflow(file_path):
    """Get workflow data from a file and POST it."""
    workflow_view = read_file(file_path)
    path = "platform-api/statemachine/persisted/workflows"
    response = send_post_request(path, workflow_view)
    if response.status_code // 100 != 2:
        logger.error(f"POST request failed with status code {response.status_code}")
        response.raise_for_status()
    return response.json()['id']

def get_empty_transition(workflow_id, file_path):
    """Get empty transition id from a file and POST it."""
    data = read_json_file(file_path)
    data['workflowId']=workflow_id
    data = json.dumps(data)
    path = f"platform-api/statemachine/persisted/workflows/{workflow_id}/transitions"
    response = send_post_request(path, data)
    if response.status_code // 100 != 2:
        print(response.json())
        logger.error(f"POST request failed with status code {response.status_code}")
        response.raise_for_status()
    return response.json()['Data']['id']

def get_existing_states(workflow_id):
    """Retrieve existing states for the given workflow ID."""
    path = f"platform-api/statemachine/persisted/workflows/{workflow_id}/states"
    res = send_get_request(path)
    if res.status_code // 100 != 2:
        logger.error(f"GET request failed with status code {res.status_code}")
        res.raise_for_status()
    return res.json()

def create_state_mapping(workflow_states):
    """Create mappings for state descriptions and IDs."""
    state_dscr_map = {item['name']: item['description'] for item in workflow_states['Data']}
    state_id_map = {item['name']: item['id'] for item in workflow_states['Data']}
    state_id_map['None'] = "noneState"
    return state_dscr_map, state_id_map

def update_states(workflow_id, file_path, existing_state_id_map):
    """Update states based on a JSON file and the existing state ID map."""
    data = read_json_file(file_path)
    states = {item['end_state']: item['end_state_description'] for item in data['transitions']}
    empty_transition_id = get_empty_transition(workflow_id, f"{WORK_DIR}/data/v1/workflows/initial_transition.json")
    path = f"platform-api/statemachine/persisted/workflows/{workflow_id}/transitions/{empty_transition_id}/states"
    state_template = read_json_file(f"{WORK_DIR}/data/v1/workflows/state.json")
    
    for name, dscr in states.items():
        if name not in existing_state_id_map:
            state_template["name"] = name
            state_template["description"] = dscr
            response = send_post_request(path, json.dumps(state_template))
            if response.status_code // 100 != 2:
                logger.error(f"POST request failed with status code {response.status_code}")
                response.raise_for_status()
            id = response.json()["Data"]["id"]
            existing_state_id_map[name] = id

def update_transitions(file_path, existing_state_id_map, workflow_id):
    """Update transitions based on a JSON file and the existing state ID map."""
    data = read_json_file(file_path)
    transition_template = read_json_file(f"{WORK_DIR}/data/v1/workflows/transition.json")
    
    save_transition_path = f"platform-api/statemachine/persisted/workflows/{workflow_id}/transitions"
    
    for item in data['transitions']:
        transition_template.update({
            'name': item['name'],
            'description': item['description'],
            'startStateId': existing_state_id_map[item['start_state']],
            'endStateId': existing_state_id_map[item['end_state']],
            'workflowId': workflow_id
        })
        response = send_post_request(save_transition_path, json.dumps(transition_template))
        if response.status_code // 100 != 2:
            logger.error(f"POST request failed with status code {response.status_code}")
            response.raise_for_status()
        print(response.json())

In [None]:
workflow_file_path = f"{WORK_DIR}/data/v1/workflows/workflow.json"
workflow_id = get_workflow(workflow_file_path)

workflow_states = get_existing_states(workflow_id)
existing_state_dscr_map, existing_state_id_map = create_state_mapping(workflow_states)

state_file_path = f"{WORK_DIR}/data/test-inputs/v1/workflows/test_res.json"
update_states(workflow_id, state_file_path, existing_state_id_map)

transition_file_path = f"{WORK_DIR}/data/test-inputs/v1/workflows/test_res.json"
update_transitions(transition_file_path, existing_state_id_map, workflow_id)


In [None]:
##provide image url
image_url = ""

import base64
import httpx
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

In [None]:
prompt = read_file_to_string(f"{WORK_DIR}/data/v1/workflows/prompt.txt")
message = HumanMessage(
    content=[
        {"type": "text", "text": f"Generate the workflow from the image. Base on the system prompt: {prompt}"},
        {
            "type": "image_url",
            "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
        },
    ],
)
response = llm.invoke([message])
print(response.content)

In [None]:
print(type(rag_chain))

In [None]:
def parse_json(result):
    if result.startswith("```"):
        return "\n".join(result.split("\n")[1:-1])
    if not result.startswith("{"):
        start_index = result.find("```json")
        if start_index != -1:
            start_index += len("```json\n")
            end_index = result.find("```", start_index)
            return result[start_index:end_index].strip()
    return result

In [None]:
answer = parse_json(response.content)

In [None]:
add_to_chat_history(id, question, answer)

In [None]:
##Rewrite CyodaCalculationMemberClient to java. Produce ready to use code. Leave spring boot
question = "how many transitions in the workflow"
result = ask_question(id, question)
print(result)