In [2]:
from langchain_openai import OpenAIEmbeddings
from dotenv import dotenv_values
import json
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_redis import RedisConfig, RedisVectorStore

# retrieving the env file
env_vars = dotenv_values(".env")

REDIS_URL = env_vars["REDIS_URL"]

# entering the creditials
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=env_vars["OPENAI_API_KEY"],
)

# defining the configuration
config = RedisConfig(
    index_name="documents",
    redis_url=REDIS_URL,
    metadata_schema=[
        {"name": "category", "type": "tag"},
    ],
)

vector_store = RedisVectorStore(embeddings, config=config)

# handling JSON files
def json_adder(filename, vector_store):

    # each JSON file has to have a category and a content

    # Loading the JSON file
    with open(filename, 'r') as f:
        json_data = json.load(f)

    # Extract texts and metadata from the JSON file
    json_texts = [item["content"] for item in json_data]
    json_metadata = [{"category": item["category"]} for item in json_data]

    # Add JSON data to Redis vector store
    vector_store.add_texts(json_texts, json_metadata)

# handling PDF files
def pdf_adder(filename, vector_store):

    # Load the PDF
    pdf_loader = PyPDFLoader(filename)
    pdf_docs = pdf_loader.load()

    # Split the PDF into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    pdf_splits = text_splitter.split_documents(pdf_docs)

    # Extract texts and metadata from PDF splits
    pdf_texts = [chunk.page_content for chunk in pdf_splits]
    pdf_metadata = [{"page_number": chunk.metadata.get("page", "unknown"), "category": "pdf"} for chunk in pdf_splits]

    vector_store.add_texts(pdf_texts, pdf_metadata)

# Adding JSON files
json_adder("sit_data.json", vector_store)

# Add PDF chunks to Redis vector store
pdf_location = "/home/krispy_noodles/chat_bot/assets/SIT_handbook.pdf"
pdf_adder(pdf_location, vector_store)

In [3]:
import redis

r = redis.Redis(
  host='redis-19030.c1.ap-southeast-1-1.ec2.redns.redis-cloud.com',
  port=19030,
  password=env_vars["REDIS_PW"])

# converting the vector store into a yaml file
yml_file_name = "redis_schema.yaml"

vector_store.index.schema.to_yaml(yml_file_name)

# Read the YAML file contents
with open(yml_file_name, 'r') as f:
    schema_yaml = f.read()

# Store the YAML schema in Redis
r.set("vector_store_schema", schema_yaml)

True

In [1]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import redis
from langchain_redis import RedisVectorStore
from dotenv import dotenv_values

# retrieving the env file
env_vars = dotenv_values(".env")

r = redis.Redis(
  host='redis-19030.c1.ap-southeast-1-1.ec2.redns.redis-cloud.com',
  port=19030,
  password=env_vars["REDIS_PW"])

# declaring the model and using the variables from the env file
llm = ChatOpenAI(
    api_key=env_vars["OPENAI_API_KEY"],
)

# entering the creditials
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=env_vars["OPENAI_API_KEY"],
)

REDIS_URL = env_vars["REDIS_URL"]

# retrieved yaml schema from Redis
schema_yaml_from_redis = r.get("vector_store_schema")
schema_yaml_str = schema_yaml_from_redis.decode("utf-8")

with open("retrieved_vector_store_schema.yaml", "w") as f:
        f.write(schema_yaml_str)

new_vector_store = RedisVectorStore(
    embeddings,
    redis_url=REDIS_URL,
    schema_path="retrieved_vector_store_schema.yaml"
)

retriever = new_vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "human",
            """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
            Question: {question} 
            Context: {context} 
            Answer:""",
        ),
    ]
)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("what engineering courses are there?")

22:48:27 redisvl.index.index INFO   Index already exists, not overwriting.


'There are engineering courses in Mechatronics and Software Engineering, Robotics and Machine Learning, Systems Engineering and Project Management, as well as Sustainable Building Engineering. These courses cover areas such as mechanical engineering, electronics, control and automation, software technology, decarbonisation, energy efficiency, and sustainable urban systems. Additionally, career opportunities include roles like Embedded Systems Engineer, Systems Engineer, Project Engineer, and Software Engineer.'

In [1]:
# clearing redis database

import redis

r = redis.Redis(
  host='redis-19030.c1.ap-southeast-1-1.ec2.redns.redis-cloud.com',
  port=19030,
  password='QunO4NqEvyeZhAl4NYz1hzWiPIKyJAhH')

# Delete all keys in db
r.flushdb()

True