# RAG Application Using Type Sense

In [2]:
import typesense
from dotenv import load_dotenv
import os


# Load environment variables from .env file
load_dotenv()

# Get the API key from environment variables
typesense_api_key = os.getenv("TYPESENSE_API_KEY")



In [14]:
client = typesense.Client({
    'nodes': [{
        'host': 'birm815qe3acvnkgp-1.a1.typesense.net',
        'port': '443',
        'protocol': 'https'
    }],
    'api_key':typesense_api_key,
    'connection_timeout_seconds': 2
})


# Create the schema
books_schema = {
    'name' : 'books',
    'fields': [
        {'name': 'title', 'type': 'string'},
        {'name': 'authors', 'type': 'string[]', 'facet': True},
        {'name': 'publication_year', 'type': 'int32', 'facet': True},
        {'name': 'ratings_count', 'type': 'int32'},
        {'name': 'average_rating', 'type': 'float'},
    ],
    'default_sorting_field': 'ratings_count'
}

print(client.collections.create(books_schema))

{'created_at': 1760031446, 'default_sorting_field': 'ratings_count', 'enable_nested_fields': False, 'fields': [{'facet': False, 'index': True, 'infix': False, 'locale': '', 'name': 'title', 'optional': False, 'sort': False, 'stem': False, 'stem_dictionary': '', 'store': True, 'type': 'string'}, {'facet': True, 'index': True, 'infix': False, 'locale': '', 'name': 'authors', 'optional': False, 'sort': False, 'stem': False, 'stem_dictionary': '', 'store': True, 'type': 'string[]'}, {'facet': True, 'index': True, 'infix': False, 'locale': '', 'name': 'publication_year', 'optional': False, 'sort': True, 'stem': False, 'stem_dictionary': '', 'store': True, 'type': 'int32'}, {'facet': False, 'index': True, 'infix': False, 'locale': '', 'name': 'ratings_count', 'optional': False, 'sort': True, 'stem': False, 'stem_dictionary': '', 'store': True, 'type': 'int32'}, {'facet': False, 'index': True, 'infix': False, 'locale': '', 'name': 'average_rating', 'optional': False, 'sort': True, 'stem': Fal

In [6]:
client

<typesense.client.Client at 0x1122f2450>

In [None]:
with open('../data/books.jsonl', 'r', encoding='utf-8') as jsonl_file:
    data = jsonl_file.read()
    client.collections['books'].documents.import_(data)



In [16]:
search_parameters = {
    'q':'harry potter',
    'query_by': 'title, authors',
    'sort_by': 'ratings_count:desc'
}
client.collections['books'].documents.search(search_parameters)

{'facet_counts': [],
 'found': 17,
 'hits': [{'document': {'authors': ['J.K. Rowling', ' Mary GrandPré'],
    'average_rating': 4.44,
    'id': '2',
    'image_url': 'https://images.gr-assets.com/books/1474154022m/3.jpg',
    'publication_year': 1997,
    'ratings_count': 4602479,
    'title': "Harry Potter and the Philosopher's Stone"},
   'highlight': {'title': {'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}},
   'highlights': [{'field': 'title',
     'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}],
   'text_match': 1157451471441100921,
   'text_match_info': {'best_field_score': '2211897868288',
    'best_field_weight': 15,
    'fields_matched': 1,
    'num_tokens_dropped': 0,
    'score': '1157451471441100921',
    'tokens_matched': 2,
    'typo_prefix_score': 0}},
  {'document': {'authors': ['J.K. Rowling', ' Mary GrandPré', ' R

In [18]:
search_parameters = {
    'q':'harry potter',
    'query_by': 'title, authors',
    'filter_by': 'publication_year:<1998',
    'sort_by': 'ratings_count:desc'
}
client.collections['books'].documents.search(search_parameters)

{'facet_counts': [],
 'found': 1,
 'hits': [{'document': {'authors': ['J.K. Rowling', ' Mary GrandPré'],
    'average_rating': 4.44,
    'id': '2',
    'image_url': 'https://images.gr-assets.com/books/1474154022m/3.jpg',
    'publication_year': 1997,
    'ratings_count': 4602479,
    'title': "Harry Potter and the Philosopher's Stone"},
   'highlight': {'title': {'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}},
   'highlights': [{'field': 'title',
     'matched_tokens': ['Harry', 'Potter'],
     'snippet': "<mark>Harry</mark> <mark>Potter</mark> and the Philosopher's Stone"}],
   'text_match': 1157451471441100921,
   'text_match_info': {'best_field_score': '2211897868288',
    'best_field_weight': 15,
    'fields_matched': 1,
    'num_tokens_dropped': 0,
    'score': '1157451471441100921',
    'tokens_matched': 2,
    'typo_prefix_score': 0}}],
 'out_of': 9979,
 'page': 1,
 'request_params': {'collection_name

In [19]:
search_parameters = {
    'q':'experyment',
    'query_by': 'title',
    'facet_by':'authors',
    'sort_by': 'ratings_count:desc'
}
client.collections['books'].documents.search(search_parameters)

{'facet_counts': [{'counts': [{'count': 1,
     'highlighted': ' Käthe Mazur',
     'value': ' Käthe Mazur'},
    {'count': 1, 'highlighted': 'Mahatma Gandhi', 'value': 'Mahatma Gandhi'},
    {'count': 1, 'highlighted': 'Gretchen Rubin', 'value': 'Gretchen Rubin'},
    {'count': 1,
     'highlighted': 'James Patterson',
     'value': 'James Patterson'}],
   'field_name': 'authors',
   'sampled': False,
   'stats': {'total_values': 4}}],
 'found': 3,
 'hits': [{'document': {'authors': ['James Patterson'],
    'average_rating': 4.08,
    'id': '569',
    'image_url': 'https://images.gr-assets.com/books/1339277875m/13152.jpg',
    'publication_year': 2005,
    'ratings_count': 172302,
    'title': 'The Angel Experiment'},
   'highlight': {'title': {'matched_tokens': ['Experiment'],
     'snippet': 'The Angel <mark>Experiment</mark>'}},
   'highlights': [{'field': 'title',
     'matched_tokens': ['Experiment'],
     'snippet': 'The Angel <mark>Experiment</mark>'}],
   'text_match': 5787300

In [20]:
# Langchain + Typesense + Groq LLm + RAG Application
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Typesense
from langchain_text_splitters import CharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_groq import ChatGroq


In [24]:
import os
from dotenv import load_dotenv
load_dotenv()

# Get the API key from environment variables
groq_api_key = os.getenv("GROQ_API_KEY")

loader = TextLoader('../data/test.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
embeddings = HuggingFaceBgeEmbeddings()

  embeddings = HuggingFaceBgeEmbeddings()
  embeddings = HuggingFaceBgeEmbeddings()


In [31]:
import os


# Load environment variables from .env file
load_dotenv()

# Get the API key from environment variables
typesense_api_key = os.getenv("TYPESENSE_API_KEY")

docsearch = Typesense.from_documents(
    docs,
    embeddings,
    typesense_client_params = {

        'host': 'birm815qe3acvnkgp-1.a1.typesense.net',
        'port': '443',
        'protocol': 'https',
        'typesense_api_key': typesense_api_key,
        'typesense_collection_name': 'lang-chain'
    } 
)




In [33]:
query = "what is artificial intelligence"
found_docs = docsearch.similarity_search(query)
print(found_docs[0].page_content)

Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]


In [None]:
# Retriver
retriever = docsearch.as_retriever()
retriever

VectorStoreRetriever(tags=['Typesense', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.typesense.Typesense object at 0x307e4a2a0>, search_kwargs={})

In [35]:
query = "what is artificial intelligence"
retriever.invoke(query)[0]

Document(metadata={'source': '../data/test.txt'}, page_content='Artificial intelligence (AI) is the capability of computational systems to perform tasks typically associated with human intelligence, such as learning, reasoning, problem-solving, perception, and decision-making. It is a field of research in computer science that develops and studies methods and software that enable machines to perceive their environment and use learning and intelligence to take actions that maximize their chances of achieving defined goals.[1]')