In [1]:
from pinecone import Pinecone, ServerlessSpec
from pinecone_text.sparse import SpladeEncoder
from langchain_community.document_loaders import JSONLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_fireworks import ChatFireworks
from langchain_pinecone import PineconeVectorStore
import torch
from sentence_transformers import SentenceTransformer
import pandas as pd
from tqdm import tqdm
import os
from dotenv import load_dotenv

load_dotenv()

  from tqdm.autonotebook import tqdm


True

In [2]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
FIREWORKS_API_KEY = os.getenv("FIREWORKS_API_KEY")
INDEX_NAME = "salechatbot"

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
pc = Pinecone(api_key=PINECONE_API_KEY)

In [4]:
index = pc.Index(INDEX_NAME)

In [5]:
splade = SpladeEncoder()

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From üëâv4.50üëà onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


In [6]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/clip-ViT-B-32")

In [7]:
from langchain_community.retrievers import (
    PineconeHybridSearchRetriever,
)

retriever = PineconeHybridSearchRetriever(
    embeddings=embeddings, sparse_encoder=splade, index=index, top_k=10
)

In [8]:
template = """You are a sale assistant. 
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

custom_rag_prompt = PromptTemplate.from_template(template)

In [15]:
llm = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-8b-instruct", max_retries=3, temperature=0.0)

In [16]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [23]:
for chunk in rag_chain.stream("T√¥i mu·ªën mua b√†n ph√≠m gaming."):
    print(chunk, end="", flush=True)

B·∫°n c√≥ th·ªÉ xem x√©t b√†n ph√≠m gaming Tech77 v·ªõi gi√° 620.000 ƒë·ªìng ho·∫∑c b√†n ph√≠m game Fuhlen L411 v·ªõi gi√° 200.000 ƒë·ªìng. C·∫£ hai ƒë·ªÅu c√≥ s·∫µn. Thanks for asking!

In [24]:
examples = retriever.invoke("T√¥i mu·ªën mua b√†n ph√≠m gaming.")

In [26]:
_=[print(example.metadata["name"]) for example in examples]

B√†n Ph√≠m C∆° Gaming d√¢y usb GK102 Hotswap ch·ªëng ·ªìn cho m√°y t√≠nh laptop h√†ng nh·∫≠p kh·∫©u
B√†n Ph√≠m Game Fuhlen L411 - H√†ng Ch√≠nh H√£ng
B√†n ph√≠m gi·∫£ c∆° K518 k√®m chu·ªôt, b√†n ph√≠m m√°y t√≠nh ch·ªëng n∆∞·ªõc led rgb ch∆°i game l√†m vi·ªác vƒÉn ph√≤ng cho PC laptop - h√†ng ch√≠nh h√£ng
B·ªô B√†n Ph√≠m Chu·ªôt Kh√¥ng D√¢y Besti BTY01 Nh·ªè G·ªçn, Ti·ªán L·ª£i Mang Theo- H√†ng Ch√≠nh H√£ng
Mi·∫øng d√°n b√†n ph√≠m ti·∫øng H√†n Qu·ªëc H√†ng Nh·∫≠p Kh·∫©u
Bao da Book cover b√†n ph√≠m d√†nh cho samsung Tab S7+/TAB S7 FE/TAB S8+ SLIM EF-DT730 H√†ng ch√≠nh h√£ng
Cu·ªôn m√∫t x·ªëp nhi·ªÅu m√†u b·ªçc g√≥c b√†n ch·ªØ U bo g√≥c b·∫£o v·ªá em b√© tr√°nh va ch·∫°m tr·∫ßy x∆∞·ªõc Legaxi
B√†n ph√≠m c∆° kh√¥ng d√¢y Newmen GM610 - H√†ng ch√≠nh h√£ng
B√†n ph√≠m Magic Keyboard Apple MK2A3 (US keyboard)
ƒê·∫ßu Android TVbox Mytv net Phi√™n b·∫£n Ram 2G/16G 4G/32G ƒëi·ªÅu khi·ªÉn IR- Xem 200 K√™nh truy·ªÅn h√¨nh mi·ªÖn ph√≠ - H√†ng Ch√≠nh H√£ng


In [27]:
# while True:
#     user_input = input("User")
#     print("User:", user_input)
#     if user_input in ["q", "quit", "bye"]:
#         break

#     print("Chatbot:", end=" ", flush=True)
#     for chunk in rag_chain.stream(user_input):
#         print(chunk, end="", flush=True)
#     print()