##Installing dependencies

In [None]:
!pip install langchain
!pip install cohere
!pip install qdrant-client
!pip install PyPDF2



In [None]:
!pip install -U langchain-community

## Reccomendation Model using cohere ai

1. Use your own api key in variable cohere_api_key
2. download and upload amazon dataset and set correct file path

In [None]:
import os
import re
import requests
from io import BytesIO
import tempfile
import PyPDF2
from typing import List
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.llms import Cohere
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.vectorstores import Qdrant
from langchain.schema import Document

# Function to read text files
def parse_txt(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
        # Remove multiple newlines
        text = re.sub(r"\n\s*\n", "\n\n", text)
    return text

# Function to read and parse PDF files
def parse_pdf(file_path):
    pdf = PyPDF2.PdfReader(file_path)
    output = []
    for page in pdf.pages:
        text = page.extract_text()
        # Merge hyphenated words
        text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
        # Fix newlines in the middle of sentences
        text = re.sub(r"(?<!\n\s)\n(?!\s\n)", " ", text.strip())
        # Remove multiple newlines
        text = re.sub(r"\n\s*\n", "\n\n", text)
        output.append(text)
    return output

# Function to split text into smaller documents
def text_to_docs(text: str) -> List[Document]:
    """Converts a string or list of strings to a list of Documents with metadata."""
    if isinstance(text, str):
        # Take a single string as one page
        text = [text]
    page_docs = [Document(page_content=page) for page in text]

    # Add page numbers as metadata
    for i, doc in enumerate(page_docs):
        doc.metadata["page"] = i + 1

    # Split pages into chunks
    doc_chunks = []

    for doc in page_docs:
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=4000,
            separators=["\n\n", "\n", ".", "!", "?", ",", " "],
            chunk_overlap=0,
        )
        chunks = text_splitter.split_text(doc.page_content)
        for i, chunk in enumerate(chunks):
            doc_chunk = Document(
                page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i}
            )
            # Add sources as metadata
            doc_chunk.metadata["source"] = f"{doc_chunk.metadata['page']}-{doc_chunk.metadata['chunk']}"
            doc_chunks.append(doc_chunk)
    return doc_chunks

# Cohere API Initiation
cohere_api_key = ''  # Replace with your actual Cohere API key

# Provide the path to the file here
file_path = '/content/amazon.txt'  # Replace with the actual path to your PDF or TXT file

# Determine the file type and parse accordingly
if file_path.endswith(".txt"):
    doc = parse_txt(file_path)
else:
    doc = parse_pdf(file_path)

# Convert the parsed document to smaller documents
pages = text_to_docs(doc)

# Function to interact with the bot
def chat_bot(pages, cohere_api_key):
    # Create our own prompt template
    prompt_template = """Text: {context}

    Question: {question}

    Answer the question based on the text provided. If the text doesn't contain the answer, reply that the answer is not available.
    You are an ecommerce recommender chatbot analyze the question and provide top 5 products and their details like price category and description matching the user needs.
    if user asks for cheap or economical sort by price of the product mentioned.
    try to answer to only the specific question user asks , only reccommend top 5 for general reccomendation and searching queries"""

    PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["context", "question"]
    )

    if pages:
        embeddings = CohereEmbeddings(
            model="multilingual-22-12", cohere_api_key=cohere_api_key
        )
        store = Qdrant.from_documents(
            pages,
            embeddings,
            location=":memory:",
            collection_name="my_documents",
            distance_func="Dot",
        )

        while True:
            question = input("Type your message here (or type 'exit' to quit): ")
            if question.lower() == 'exit':
                break

            chain_type_kwargs = {"prompt": PROMPT}
            qa = RetrievalQA.from_chain_type(
                llm=Cohere(model="command", temperature=0, cohere_api_key=cohere_api_key),
                chain_type="stuff",
                retriever=store.as_retriever(),
                chain_type_kwargs=chain_type_kwargs,
                return_source_documents=True,
            )

            answer = qa({"query": question})
            result = answer["result"].replace("\n", "").replace("Answer:", "")
            print(f"Bot: {result}")

    else:
        print("No file found. Upload a file to chat!")

# Start the chat bot
chat_bot(pages, cohere_api_key)