In [1]:
# make commit message
# commit message: [langchain_scripts] update

In [13]:
import random
import psycopg2
import streamlit as st
import os
import openai
import psycopg2
from itertools import zip_longest
import streamlit as st
import pandas as pd
import json
import glob
import openai
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.utilities import SQLDatabase
from langchain.prompts import ChatPromptTemplate
from langchain.cache import SQLAlchemyCache
from langchain.globals import set_llm_cache
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)


In [2]:
# Install packages
!pip install --upgrade openai==1.1.0 # Need to use 1.1.0 version only for compatibility!!
!pip install langchain langchain-experimental
!pip install psycopg2-binary
!pip install streamlit
!pip install streamlit-chat
!pip install pandas
!pip install sqlalchemy



In [2]:
# After installing langchain, you need to run 'openai migrate' command in cli for the first time.
import random
import psycopg2
import streamlit as st
import os
import openai
import psycopg2
import streamlit as st
import pandas as pd
import json
import glob
import openai
import dotenv
from langchain.chat_models import ChatOpenAI
from langchain.utilities import SQLDatabase
from langchain.agents.openai_assistant import OpenAIAssistantRunnable
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.utilities import SQLDatabase
from langchain.prompts import ChatPromptTemplate
from langchain.cache import SQLAlchemyCache
from langchain.globals import set_llm_cache
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.chains import LLMChain # These are not working on openai>=1.2.0
from langchain_experimental.sql import SQLDatabaseChain # These are not working on openai>=1.2.0

In [3]:
# Define some functions

def init_connection():
    """
    Initialize a SQL database connection using the credentials stored in st.secrets.
    Returns:
    connection (psycopg2.connection): The SQL database connection.
    """
	# the contents of st.secrets are in .streamlit/secrets.toml
    connection = psycopg2.connect(
        user = st.secrets['username'],
		password = st.secrets['password'],
		host = st.secrets['host'],
		port = st.secrets['port'],
		database = st.secrets['database']
	)
    return connection
     

def init_db(include_tables = []):
    """
    Initialize a SQL database db object for langchain using the credentials stored in st.secrets.

    Returns:
    db (SQLDatabase): The SQL database object.
    pg_uri (str): The PostgreSQL URI used for the connection.
    """

    # Retrieve the credentials from st.secrets
    user = st.secrets['username']
    password = st.secrets['password']
    host = st.secrets['host']
    port = st.secrets['port']
    database = st.secrets['database']
    
    # Create the PostgreSQL URI
    pg_uri = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"
    
    # Create the SQLDatabase object
    if include_tables == []:    
        db = SQLDatabase.from_uri(pg_uri)
    else:
        db = SQLDatabase.from_uri(pg_uri, include_tables=include_tables,sample_rows_in_table_info=2)
    
    return db, pg_uri

### Task 1. Raw Date Processing and insert into SQL DB

In [8]:
# assume student_id and problem_id are given 
problem_id = 1
student_id = 12345678 

connection = init_connection()
cursor = connection.cursor()

def get_problem(cursor, problem_id):
    query = """
    SELECT
        p.problem_id,
        p.question,
        p.solution,
        p.hint,
        p.level,
        p.knowledge_score,
        ARRAY_AGG(DISTINCT su.sub_unit_name) AS sub_unit,
        ARRAY_AGG(DISTINCT mu.main_unit_name) AS main_unit,
        ARRAY_AGG(DISTINCT a.area_name) AS area
    FROM
        knowledge_map_db.problem p
    INNER JOIN knowledge_map_db.knowledge_problem kp
        ON p.problem_id = kp.problem_id
    INNER JOIN knowledge_map_db.knowledge k
        ON kp.knowledge_id = k.knowledge_id
    INNER JOIN knowledge_map_db.sub_unit su
        ON k.sub_unit_id = su.sub_unit_id
    INNER JOIN knowledge_map_db.main_unit mu
        ON su.main_unit_id = mu.main_unit_id
    INNER JOIN knowledge_map_db.area a
        ON mu.area_id = a.area_id
    WHERE
        p.problem_id = {}
    GROUP BY
        p.problem_id;
    """.format(problem_id)

    cursor.execute(query)
    query_data = cursor.fetchall()

    # make problem dictionary
    problem = {}
    problem['problem_id'] = query_data[0][0]
    problem['question'] = query_data[0][1]
    problem['solution'] = query_data[0][2]
    problem['hint'] = query_data[0][3]
    problem['level'] = query_data[0][4]
    problem['knowledge_score'] = query_data[0][5]
    problem['sub_unit'] = query_data[0][6]
    problem['main_unit'] = query_data[0][7]
    problem['area'] = query_data[0][8]
    
    return problem

def get_student_answer(cursor, student_id, problem_id):
    query = """
    SELECT
        student_answer,
        knowledge_score,
        feedback
    FROM
        Student_DB.Problem_Progress
    WHERE
        Student_ID = {} AND
        Problem_ID = {}
    ORDER BY
        Timestamp DESC
    LIMIT 1;
    """.format(student_id, problem_id)

    cursor.execute(query)
    query_data = cursor.fetchall()

    # make problem dictionary
    student_answer = {}
    student_answer['student_answer'] = query_data[0][0]
    student_answer['knowledge_score'] = query_data[0][1]
    student_answer['feedback'] = query_data[0][2]
    
    return student_answer

def get_system_prompt(problem, student_answer):
    prompt = """
    Mathematics Problem Interactive Feedback Session

    Context: You are a chatbot tasked with discussing a mathematics problem with a student. Your role is to provide interactive feedback on the student's submitted answer, guide them to understand any mistakes, and encourage learning.

    Problem Details:
    - Question: {}
    - Correct Solution: {}
    - Hint for the problem: {}
    - Related knowledge and its score: {}
    - Related high-school math unit: {}
    
    Student's Submission:
    - Answer: {}
    - student's score for each knowledge: {}
    - Feedback: {}

    Instructions:
    - During the conversation, you should use Korean language.
    - Engage in a friendly and supportive conversation with the student.
    - Discuss the student's answer, highlighting what was done well and where improvements can be made.
    - Use the provided hint and correct solution to guide the student towards understanding any errors.
    - Offer constructive suggestions on how to approach similar problems in the future.
    - Encourage the student to ask questions and express any confusion for further clarification.

    """.format(
        problem['question'], problem['solution'], problem['hint'],
        problem['knowledge_score'], problem['main_unit'],
        student_answer['student_answer'], student_answer['knowledge_score'],
        student_answer['feedback']
    )
    
    return prompt

# check the existence of connection and cursor 
if 'cursor' not in globals():
    if 'connection' not in globals():
        connection = init_connection()
    cursor = connection.cursor()

# get problem and student answer
problem = get_problem(cursor, problem_id)
student_answer = get_student_answer(cursor, student_id, problem_id)

# get system prompt
system_prompt = get_system_prompt(problem, student_answer)

print(system_prompt)

IndexError: list index out of range

In [5]:
# Connect to the SQL database and get the knowledge and sub_unit tables
connection = init_connection()
# create a table in the database
cursor = connection.cursor()
# load data from sql table 'knowledge_map_db.knowledge'.
query = "SELECT knowledge_id, knowledge_name FROM knowledge_map_db.knowledge;"
cursor.execute(query)
knowledge_table = 'knowledge_map_db.knowledge'
knowledge_columns = [description[0] for description in cursor.description]
knowledge_data = cursor.fetchall()

query = "SELECT sub_unit_id, sub_unit_name FROM knowledge_map_db.sub_unit;"
cursor.execute(query)
sub_unit_table = 'knowledge_map_db.sub_unit'
sub_unit_data = cursor.fetchall()
sub_unit_columns = [description[0] for description in cursor.description]
knowledge_str = f"{knowledge_table}, {knowledge_columns}, {knowledge_data}"
sub_unit_str = f"{sub_unit_table}, {sub_unit_columns}, {sub_unit_data}"# modify print as var
print(knowledge_str)
print(sub_unit_str)

knowledge_map_db.knowledge, ['knowledge_id', 'knowledge_name'], [(1, ' 다항식의 사칙연산'), (2, ' 다항식 관련 용어'), (3, ' 곱셈공식의 변형'), (4, ' 곱셈공식'), (5, ' 항등식의 정의'), (6, ' 항등식의 성질'), (7, ' 미정계수법'), (8, ' 나머지정리'), (9, ' 조립제법'), (10, ' 인수정리'), (11, ' 공통인수 묶기'), (12, ' 삼차식의 인수분해 공식'), (13, ' 이차식의 인수분해 공식'), (14, ' 치환에 의한 인수분해'), (15, ' 고차식의 인수분해'), (16, ' 복잡한 식의 인수분해'), (17, ' 켤레복소수'), (18, ' 복소수의 정의'), (19, ' 복소수가 서로 같을 조건'), (20, ' 복소수의 사칙연산'), (21, ' 켤레복소수의 성질'), (22, ' 허수단위의 순환성'), (23, ' 음수의 제곱근'), (24, ' 이차방정식의 실근의 부호 판단'), (25, ' 이차방정식과 이차함수의 관계'), (26, ' 이차방정식의 실근의 위치에 따른 조건'), (27, ' 이차함수의 그래프와 직선의 위치 관계'), (28, '이차함수의 최솟값'), (29, '이차함수의 최댓값'), (30, ' 삼차방정식의 켤레근'), (31, ' 방정식의 허근의 성질'), (32, ' 삼차방정식과 사차방정식의 풀이'), (33, ' 삼차방정식의 근과 계수의 관계'), (34, ' 세 근을 알 때 삼차방정식 구하는 방법'), (35, ' 대칭연립방정식'), (36, ' 해가 없거나 무수히 많은 연립일차방정식'), (37, ' 미지수가 2개인 연립이차방정식의 풀이'), (38, ' 공통근'), (39, ' 부정방정식의 풀이'), (40, ' 절댓값 기호를 포함한 부등식의 풀이 방법'), (41, ' 연립부등식의 풀이 방법'), (42, ' 부등식 ax>b의 풀이 방법'), (43, ' 연립이차부등식의 풀이 방법'), (44,

In [6]:
# Define the templates for raw data processing prompts

templates = {}

templates["give problem"] = """
You are a high school mathematics instructor. Evaluate if every mathematical question in the MATH dataset is relevant to a topic commonly included in the high school curriculum and aligns with the level of high school education.

Details:
- Exemplary data format includes:
  - Problem: 'We roll a fair 6-sided die 5 times. What is the probability that we get a 6 in at most 2 of the rolls?'
  - Level: 'Level 5'
  - Type: 'Counting & Probability'
  - Solution: [Complex mathematical formula]

This is given problem
{problem}

Assess whether this math question is relevant to a topic commonly included in the high school curriculum and aligns with the level of high school education. Answer yes as 1 or no as 0.
"""

templates["knowledge_mapping"] = """
You are a high school mathematics instructor.
We need to map the given math question to the knowledge_map_db.knowledge table.

Exemplary data format includes:
Problem: "We roll a fair 6-sided die 5 times. What is the probability that we get a 6 in at most 2 of the rolls?"
Level: "Level 5"
Type: "Counting & Probability"
Solution: [Complex mathematical formula]

You need to identify 1-3 knowledge areas within the knowledge_map_db.knowledge table that are related to each question.
Table content is as follows:
{knowledge_str}

Simply print the ('knowledge_id', 'knowledge_name') in json format for each given problem.
""".format(knowledge_str=knowledge_str)


templates["unit_mapping"] = """
You are a high school mathematics instructor.
We need to map each given problem to the knowledge_map_db.sub_unit table.

Exemplary data format is as follows:
Problem: "We roll a fair 6-sided die 5 times. What is the probability that we get a 6 in at most 2 of the rolls?"
Level: "Level 5"
Type: "Counting & Probability"
Solution: [Complex mathematical formula]

You need to identify 1 unit within knowledge_map_db.sub_unit table that is related to each question.
Table content is as follows:
{sub_unit_str}  

Simply print the ('sub_unit_id', 'sub_unit_name') in json format for each given problem.
""".format(sub_unit_str=sub_unit_str)

templates["postprocess"] = """
You are a high school mathematics instructor. We will be processing data from the MATH dataset.

Exemplary data format is as follows:
Problem: "We roll a fair 6-sided die 5 times. What is the probability that we get a 6 in at most 2 of the rolls?"
Level: "Level 5"
Type: "Counting & Probability"
Solution: [Complex mathematical formula]

Expected data format for the knowledge_map_db.problem db is as follows:
question: text (Translate the original problem into Korean and encapsulate LaTeX expressions with $)
solution: text (Translate the original solution into Korean and encapsulate LaTeX expressions with $)
hint: text (Create 1 hint providing a concise sentence that emphasizes the key concept or guides problem-solving
level: int (Use the original level.)
step_criteria: json (Create 2-5 steps based on the solution. Format: '{"criteria1": …, "criteria2": …, …}'
step_score: 'int[]' (Assign scores to each step for 'step_criteria', totaling 10.)
competence: 'numeric[]' (Create a relevance score vector for five competence elements: 'problem-solving', 'information processing', 'reasoning', 'communication', 'connection', according to the Korean mathematics curriculum and your own estimation. The total should sum up to 1.)

Simply print the ('question', 'solution', 'hint', 'level', 'step_criteria', 'step_score', 'competence') in json format.
"""

templates["insert_problem"] = """
You are a sql database Chatbot.
For each given data, we want to make a sql query to insert data into the knowledge_map_db.problem table.
make the insert query for the knowledge_map_db.problem table.
Exemplary data format is as follows:
problem_id: serial (constraint: primary_key, you don't need to insert this column)
question: text
solution: text
hint: text
level: int
step_criteria: json
step_score: 'int[]'
competence: 'numeric[]'
sub_unit_id: int

Please answer insert query for the knowledge_map_db.problem table without any other message.
Also, Do not use ```sql{answer}``` format , just print text.
"""

templates["insert_knowledge_problem"] = """
You are a sql database Chatbot.
For given data, we want to make a sql query to insert data into the knowledge_map_db.knowledge_problem which shows the relationship between knowledge and problem.

Exemplary data format is as follows:
knowledge_problem table:
knowledge_id: int 
problem_id: int (we don't know )

Based on given data, make the insert query for the knowledge_map_db.knowledge_problem table without any other message.
Do not use ```sql{answer}``` format , just print text.
Template :
WITH inserted_problem AS (
    SELECT problem_id
    FROM knowledge_map_db.problem
    WHERE question = '{{question}}'
)
INSERT INTO knowledge_map_db.knowledge_problem (knowledge_id, problem_id)
SELECT knowledge_id, ip.problem_id
FROM inserted_problem ip, (VALUES ({{knowledge_id}}), ({{knowledge_id}}), ({knowledge_id})) AS knowledge(knowledge_id);
"""

In [10]:
# Connect to the SQL database and get the knowledge and sub_unit tables as dataframes
connection = init_connection()

# Initialize the chat models
gpt_4 = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0.3, api_key=st.secrets["apikey"])
gpt_3 = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0.3, api_key=st.secrets["apikey"])

# Define the function to run a single chat interaction with the given chat model with prompt
def langchain_single_chat(chat, system_message, human_message):
  """
  Run a single chat interaction with the given chat model.

  Args:
  chat (ChatOpenAI): The chat model.
  system_message (SystemMessage): The system message to send to the chat model.
  human_message (HumanMessage): The human message to send to the chat model.

  Returns:
  response (AIMessage): The response from the chat model.
  """
  # define the messages
  messages = [SystemMessage(content=system_message), HumanMessage(content=human_message)]
  # get the response from the chat model
  response = chat(messages)
  # return the response as a string
  return response.content

# get all the files in the nested directory ./MATH/
files = glob.glob("./MATH/" + "**/*.json", recursive=True)
# Shuffle the files
random.shuffle(files)

# Load a json file and print
for file_path in files[:10]:
  with open(file_path) as f:
    raw_data = json.load(f)
    raw_data = str(raw_data)
    print(raw_data)
    isHighSchoolLevel = langchain_single_chat(gpt_4, templates["give problem"], raw_data)
    print("isHighSchoolLevel:", isHighSchoolLevel)
    # Print the response content
    if isHighSchoolLevel == "1":
      relatedKnolwedge = langchain_single_chat(gpt_4, templates["knowledge_mapping"], raw_data)
      print("response:", relatedKnolwedge)
      relatedUnit = langchain_single_chat(gpt_4, templates["unit_mapping"], raw_data)
      print("response:", relatedUnit)
      postprocess_response = langchain_single_chat(gpt_4, templates["postprocess"], raw_data)
      print("postprocessed data:", postprocess_response)
      insert_problem_query = langchain_single_chat(gpt_3, templates["insert_problem"], f"{postprocess_response}, {relatedUnit}")
      print("insert_problem_query:", insert_problem_query)
      insert_knowledge_problem_query = langchain_single_chat(gpt_3, templates["insert_knowledge_problem"], f"{postprocess_response}, {relatedKnolwedge}")
      print("insert_knowledge_problem_query:", insert_knowledge_problem_query)
      # Conduct insert query on the database
      try:
        cursor = connection.cursor()
        cursor.execute(insert_problem_query)
        cursor.execute(insert_knowledge_problem_query)
      except (Exception, psycopg2.Error) as error:
        print("Error while connecting to PostgreSQL", error)
        connection.rollback()
      finally:
        cursor.close()
        connection.commit()
    # delete the file to avoid duplication
    os.remove(file_path)

{'problem': 'Find the largest value of $\\frac{y}{x}$ for pairs of real numbers $(x,y)$ that satisfy\n\\[(x - 3)^2 + (y - 3)^2 = 6.\\]', 'level': 'Level 5', 'type': 'Intermediate Algebra', 'solution': 'Let $k = \\frac{y}{x}.$  Then $y = kx,$ so\n\\[(x - 3)^2 + (kx - 3)^2 = 6.\\]Expressing this as a quadratic in $x,$ we get\n\\[(k^2 + 1) x^2 - (6k + 6) k + 12 = 0.\\]This quadratic has real roots when its discriminant is nonnegative:\n\\[(6k + 6)^2 - 4(k^2 + 1)(12) \\ge 0.\\]This simplifies to $k^2 - 6k + 1 \\le 0.$  The roots of the corresponding equation $k^2 - 6k + 1 = 0$ are\n\\[3 \\pm 2 \\sqrt{2},\\]so the solution to $k^2 - 6k + 1 \\le 0$ is $3 - 2 \\sqrt{2} \\le k \\le 3 + 2 \\sqrt{2}.$\n\nTherefore, the largest possible value of $k = \\frac{y}{x}$ is $\\boxed{3 + 2 \\sqrt{2}}.$'}
isHighSchoolLevel: 1
response: ```json
[
  {"knowledge_id": 24, "knowledge_name": "이차방정식과 이차함수의 관계"},
  {"knowledge_id": 28, "knowledge_name": "이차함수의 최댓값"},
  {"knowledge_id": 67, "knowledge_name": "원의 방

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4-1106-preview in organization org-ClAjD0poWvAlqKcGLYA0dAAu on tokens per day (TPD): Limit 500000, Used 499274, Requested 2429. Please try again in 4m54.278s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}

In [8]:
insert_problem_query = """
INSERT INTO knowledge_map_db.problem (question, solution, hint, level, step_criteria, step_score, competence, sub_unit_id) 
VALUES (
  '두 보각 A와 B의 크기가 각각 7:23의 비율로 주어집니다. A의 보각의 크기와 B의 보각의 크기의 비율을 기약분수로 표현하시오.',
  '각 A의 보각은 B이고, 각 B의 보각은 A입니다. 따라서 우리는 B와 A의 비율을 구하고자 하는데, 이는 A와 B의 비율의 역수이므로 $\\boxed{\\frac{23}{7}}$입니다.',
  '보각의 개념을 사용하여 각도의 비율을 찾으세요.',
  5,
  '{"1": "보각의 정의를 이해하고 적용하기", "2": "주어진 비율을 이용하여 각도의 보각을 찾기", "3": "비율의 역수를 계산하기"}',
  '{3, 4, 3}',
  '[0.25, 0.2, 0.25, 0.15, 0.15]',
  73
);
"""
cursor.execute(insert_problem_query)
cursor.execute(insert_knowledge_problem_query)
cursor.close()
connection.commit()


InFailedSqlTransaction: current transaction is aborted, commands ignored until end of transaction block


In [9]:
from concurrent.futures import ThreadPoolExecutor
import json
import glob
import os

def process_file(file_path):
    # Load the JSON file
    with open(file_path) as f:
        raw_data = json.load(f)
        raw_data_str = str(raw_data)
    
    # Interact with the chat model
    isHighSchoolLevel = langchain_single_chat(gpt_4, templates["give problem"], raw_data_str)

    if isHighSchoolLevel == "1":
        relatedKnowledge = langchain_single_chat(gpt_4, templates["knowledge_mapping"], raw_data_str)
        relatedUnit = langchain_single_chat(gpt_4, templates["unit_mapping"], raw_data_str)
        postprocess_response = langchain_single_chat(gpt_4, templates["postprocess"], raw_data_str)
        
        insert_problem_query = langchain_single_chat(gpt_3, templates["insert_problem"], f"{postprocess_response}, {relatedUnit}")
        insert_knowledge_problem_query = langchain_single_chat(gpt_3, templates["insert_knowledge_problem"], f"{postprocess_response}, {relatedKnowledge}")

        # Conduct insert queries on the database
        try:
            local_connection = init_connection()
            local_cursor = local_connection.cursor()
            local_cursor.execute(insert_problem_query)
            local_cursor.execute(insert_knowledge_problem_query)
        except (Exception, psycopg2.Error) as error:
            print("Error while connecting to PostgreSQL", error)
            local_connection.rollback()
        finally:
            local_cursor.close()
            local_connection.commit()
    # Delete the file to avoid duplication
    os.remove(file_path)

# get all the files in the nested directory ./MATH/
files = glob.glob("./MATH/" + "**/*.json", recursive=True)
random.shuffle(files)

# Use ThreadPoolExecutor to parallelize file processing
with ThreadPoolExecutor(max_workers=10) as executor:
    executor.map(process_file, files[:1000])

Error while connecting to PostgreSQL connection to server at "147.47.200.145", port 34543 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

Error while connecting to PostgreSQL invalid input syntax for type json
LINE 7:   '{"step1": "코사인 함수의 주기성을 인식하고 이를 이용하여 ...
          ^
DETAIL:  Escape sequence "\s" is invalid.
CONTEXT:  JSON data, line 1: ...기", "step2": "계산된 코사인 값으로 $\s...

Error while connecting to PostgreSQL malformed array literal: "[3, 4, 3]"
LINE 8:   '[3, 4, 3]',
          ^
DETAIL:  Missing "]" after array dimensions.

Error while connecting to PostgreSQL malformed array literal: "[3, 4, 3]"
LINE 8:   '[3, 4, 3]',
          ^
DETAIL:  Missing "]" after array dimensions.

Error while connecting to PostgreSQL unterminated quoted string at or near "'{"criteria1": "두 점의 x좌표를 더하고 2로 나눈다", "criteria2": "두 점의 y좌표를 더하고 2로 나"
LINE 7:   '{"criteria1": "두 점의 x좌표를 더하고 2로 나눈다", "crite...
  

### Task 2. SQL database chain

In [12]:
# Ref
# https://github.com/langchain-ai/langchain/issues/6918
# Talk to your Database using RAG and LLMs
# (ref: https://medium.com/@shivansh.kaushik/talk-to-your-database-using-rag-and-llms-42eb852d2a3c)

# Status: In progress. It just works for a single question not for retrival questions.
# To Do:
# 1. Add memory to the SQLDatabaseChain object.
# 2. Add some examples SQL queries to the memory.
# 3. Test the pgvector extension and similarity search.

llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0.3, api_key=st.secrets["apikey"])

# Initialize the SQL database connection
db, pg_uri = init_db()

# Create the SQLDatabaseChain object
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True, top_k=3)

# Define the question to run the SQL query
question = "Find the all schemes in our db."

PROMPT = """ 
Given an input question, first create a syntactically correct postgresql query to run,  
then look at the results of the query and return the answer.  
The question: {question}
""".format(question=question)
# Run the SQL query using the db_chain object
db_chain.run(PROMPT)





[1m> Entering new SQLDatabaseChain chain...[0m
 
Given an input question, first create a syntactically correct postgresql query to run,  
then look at the results of the query and return the answer.  
The question: Find the all schemes in our db.

SQLQuery:[32;1m[1;3mSince you have not provided the schema of the database with the table names and columns that would contain information about "schemes," I cannot create a syntactically correct PostgreSQL query to answer the question. The tables provided (full_llm_cache and full_md5_llm_cache) do not contain any information about "schemes" in a database context. 

In a typical PostgreSQL database, "schemes" might refer to "schemas," which are collections of database objects, including tables, views, and functions. If you are asking for the list of schemas in a PostgreSQL database, the query would involve querying the `information_schema.schemata` system catalog. However, since this is not part of the tables you've provided, I cannot r

ProgrammingError: (psycopg2.errors.SyntaxError) syntax error at or near "Since"
LINE 1: Since you have not provided the schema of the database with ...
        ^

[SQL: Since you have not provided the schema of the database with the table names and columns that would contain information about "schemes," I cannot create a syntactically correct PostgreSQL query to answer the question. The tables provided (full_llm_cache and full_md5_llm_cache) do not contain any information about "schemes" in a database context. 

In a typical PostgreSQL database, "schemes" might refer to "schemas," which are collections of database objects, including tables, views, and functions. If you are asking for the list of schemas in a PostgreSQL database, the query would involve querying the `information_schema.schemata` system catalog. However, since this is not part of the tables you've provided, I cannot run such a query.

If you can provide the correct table or system catalog that contains the information about "schemes" or "schemas," I can then create the appropriate query.]
(Background on this error at: https://sqlalche.me/e/20/f405)

### Task 3. Chat with Memory

Prompt

너는 한국어 및 한국어 교육과정을 기반으로 한 고등학교 수학 도우미야.
너는 앞으로 수학 교사로서 행동해야 해.
1. 지금 풀고 있는 문제의 지식 요소는 다음과 같아
2. 지금 풀고 있는 문제에 관한 학생의 답은 다음과 같아
3. 지금 풀고 있는 문제에 관한 
4. 지금 풀고 있는 문제의 정답
5. 지금 풀고 있는 문제의 채점 기준은 다음과 같아.
4. 지금부터 제일 처음에는 '안녕하세요. 문제에 관해 물어보세요.'라고 시작해.



In [None]:
# To DO:
# 1. Test SQL memory in https://python.langchain.com/docs/integrations/memory/sql_chat_message_history
# 2. Save conversation history in the SQL database.
# 3. Save and load memory from the SQL database.
# 4. Test the connection with SQLdatabaseChain and openai assistant api.
# 5. Make a chatbot in UI using streamlit.
# 6. Test the cache in langchain.

from langchain.chains import LLMChain
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
)
from langchain.memory import PostgresChatMessageHistory
from sqlalchemy import create_engine
# engine = create_engine(pg_uri)
# set_llm_cache(SQLAlchemyCache(engine))

# LLM
llm = ChatOpenAI()

# Prompt
prompt = ChatPromptTemplate(
    messages=[
        SystemMessagePromptTemplate.from_template(
            "You are a nice chatbot having a conversation with a human."
        ),
        # The `variable_name` here is what must align with memory
        MessagesPlaceholder(variable_name="chat_history"),
        HumanMessagePromptTemplate.from_template("{question}"),
    ]
)

# Notice that we `return_messages=True` to fit into the MessagesPlaceholder
# Notice that `"chat_history"` aligns with the MessagesPlaceholder name
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversation = 52(llm=llm, prompt=prompt, verbose=True, memory=memory)

# Notice that we just pass in the `question` variables - `chat_history` gets populated by memory
conversation({"question": "hi"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a nice chatbot having a conversation with a human.
Human: hi[0m

[1m> Finished chain.[0m


{'question': 'hi',
 'chat_history': [HumanMessage(content='hi'),
  AIMessage(content='Hello! How can I assist you today?')],
 'text': 'Hello! How can I assist you today?'}

In [None]:
conversation({"question": "Can you save previous messages?"})



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a nice chatbot having a conversation with a human.
Human: hi
AI: Hello! How can I assist you today?
Human: Translate this sentence from English to French: I love programming.
AI: Sure! The translation of "I love programming" from English to French is "J'adore programmer."
Human: Can you save previous messages?[0m

[1m> Finished chain.[0m


{'question': 'Can you save previous messages?',
 'chat_history': [HumanMessage(content='hi'),
  AIMessage(content='Hello! How can I assist you today?'),
  HumanMessage(content='Translate this sentence from English to French: I love programming.'),
  AIMessage(content='Sure! The translation of "I love programming" from English to French is "J\'adore programmer."'),
  HumanMessage(content='Can you save previous messages?'),
  AIMessage(content="As an AI language model, I don't have the capability to save or recall previous messages. Each interaction is treated as a separate query. Is there something specific you would like to refer back to?")],
 'text': "As an AI language model, I don't have the capability to save or recall previous messages. Each interaction is treated as a separate query. Is there something specific you would like to refer back to?"}

In [None]:
# Test the cache in langchain

from langchain.globals import set_llm_cache
from langchain.cache import InMemoryCache, SQLAlchemyCache
from sqlalchemy import create_engine

cache_type = 'sqlalchemy' # or 'InMemoryCache'
if cache_type == 'sqlalchemy':
    engine = create_engine(pg_uri)
    set_llm_cache(SQLAlchemyCache(engine))
elif cache_type == 'InMemoryCache':
    set_llm_cache(InMemoryCache())

OperationalError: (psycopg2.OperationalError) connection to server at "147.47.200.145", port 34543 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

(Background on this error at: https://sqlalche.me/e/20/e3q8)

### Task 4. use Openai assistant api in Langchain

In [7]:
# Ref1: https://python.langchain.com/docs/modules/agents/agent_types/openai_assistants
# Ref2: https://github.com/langchain-ai/langchain/blob/master/cookbook/openai_v1_cookbook.ipynb
# Ref3: https://www.youtube.com/watch?v=IZGBshGqB3g&t=1s
# Status: In progress. It could be used for the math assistant chatbot and auto-grading system.
# To Do:
# 1. GPT-4 vision test

import dotenv
dotenv.load_dotenv()

# This is a test code for langchain with OpenAI Assistant api
OPENAI_API_KEY = st.secrets["apikey"]
openai.api_key = OPENAI_API_KEY
interpreter_assistant = OpenAIAssistantRunnable.create_assistant(
    name="langchain assistant",
    instructions="You are a personal math tutor. Write and run code to answer math questions.",
    tools=[{"type": "code_interpreter"}],
    model="gpt-4-1106-preview",
)
output = interpreter_assistant.invoke({"content": "What's 10 - 4 raised to the 2.7"})

In [10]:
print(output)

[ThreadMessage(id='msg_yzFwuO5Roh4mMnYuqOxKEI78', assistant_id='asst_RZm8SzXSZsYVVDvt1R6T7RFN', content=[MessageContentText(text=Text(annotations=[], value='It looks like there was an error due to too many requests. Let me try that calculation again for you.'), type='text')], created_at=1701764036, file_ids=[], metadata={}, object='thread.message', role='assistant', run_id='run_zswMUbg5x9bZgD7kt39p4YXi', thread_id='thread_MJJiGjGWiVwKGxl0z4w1JgE2'), ThreadMessage(id='msg_FTCmSMmCyBrryY5pw7weGlDs', assistant_id='asst_RZm8SzXSZsYVVDvt1R6T7RFN', content=[MessageContentText(text=Text(annotations=[], value="I apologize for the inconvenience. It seems we are experiencing some technical difficulties with running the code. However, I can explain how you can calculate the result manually.\n\nThe expression 10 - 4 raised to the 2.7 means you first need to calculate 4 raised to the power of 2.7 and then subtract that result from 10.\n\nHere's how you can do it with a calculator for instance:\n\n1